diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..03d0dfd7a05762d5d6e735a737b4e5d61041c7f8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a0f59b7b91d61179514d6e990a01e588f1af99b0 --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.0 \ No newline at end of file diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..29e0b3a4b795e316b1a7ba9b7dc790302a9d6e0f --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "gate_proj", + "o_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74b40a870465dd4072b95e51e12453f81cea03d7 --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f1cd3c7f000e6316e8e81f9e6a6b6e655d42261c0e2629705779bb2459da58 +size 167832688 diff --git a/checkpoint-200/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-200/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf1531dbfdf041a6dddf47dd4b9e9f490b645e69 --- /dev/null +++ b/checkpoint-200/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8c4f4239855212ca0db1765261ecb116a44a964b17f21b4a6f2206be7168e2f +size 72284496 diff --git a/checkpoint-200/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/checkpoint-200/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac949177c78b16f59f6f87da1a0a64a87a4b354a --- /dev/null +++ b/checkpoint-200/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2789c21e98e766a093aa2cb2461d3f7f50e4046758714682d4a35ad051160848 +size 72284496 diff --git a/checkpoint-200/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/checkpoint-200/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e01312578071633790c332da02ce9b5a2c315160 --- /dev/null +++ b/checkpoint-200/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc7a967ba9f0aa4fa676cc7122fbe17e1dcc2e5c9e827ccffebde75674817c38 +size 72284496 diff --git a/checkpoint-200/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/checkpoint-200/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..02797d49a884284e29dedbddcbaa3a32456541e7 --- /dev/null +++ b/checkpoint-200/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51014d562689622606388a2d5b76966fe1b72010d4a2078ef2446d05c74fc3b9 +size 72284496 diff --git a/checkpoint-200/global_step200/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/checkpoint-200/global_step200/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..37627f823bc2802c13a44c8c8cba68d39d6aa402 --- /dev/null +++ b/checkpoint-200/global_step200/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3d2a3d2feb260beeb935bdd769a575e283663381ec88b4f63946220f83011b +size 72284496 diff --git a/checkpoint-200/global_step200/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/checkpoint-200/global_step200/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..11c82e3f74cd4e71e5ad7a8dea226f1625a8fe65 --- /dev/null +++ b/checkpoint-200/global_step200/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e0ce3aa18ed46a547b62e29ca307b71e7979430b5bcce2460c6c350b4d6802f +size 72284496 diff --git a/checkpoint-200/global_step200/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/checkpoint-200/global_step200/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..460f88af5d98ac812b6abe9b9f78f2bb3176efc3 --- /dev/null +++ b/checkpoint-200/global_step200/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:624cf125b33e4c71ae3dfaad3509442f11f14d5f68ee0e29df319d8c932262e8 +size 72284496 diff --git a/checkpoint-200/global_step200/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-200/global_step200/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2dd6b0459124bec6c1d0155c4ae3d9aa4bab05a --- /dev/null +++ b/checkpoint-200/global_step200/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f262d283f5ae3f36d754b33518c9db629e74b210198a43f75f863272e6b3eb +size 443182 diff --git a/checkpoint-200/global_step200/zero_pp_rank_1_mp_rank_00_model_states.pt b/checkpoint-200/global_step200/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebdc9474cd1c2879f021ea28c2aa7d6e1f50e52f --- /dev/null +++ b/checkpoint-200/global_step200/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:732dd90f1cab1d43aac02ebb1302fd8dec66d7adcfd3694ef578e07070d9c140 +size 443182 diff --git a/checkpoint-200/global_step200/zero_pp_rank_2_mp_rank_00_model_states.pt b/checkpoint-200/global_step200/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1080c6d294be4fd14611b1b3e1a23c0bcdce34b7 --- /dev/null +++ b/checkpoint-200/global_step200/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c2ac750a14d59ce641530822b69fc398b3d45eb40da21769e2c492838ca4f2d +size 443182 diff --git a/checkpoint-200/global_step200/zero_pp_rank_3_mp_rank_00_model_states.pt b/checkpoint-200/global_step200/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5ce0dc2011886f7b6a0a7c6099c0c53b127372f --- /dev/null +++ b/checkpoint-200/global_step200/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe19e077ea086d4c5aa6b5d2c4c8e9a299060b9c19b367776969b50443dabbb +size 443182 diff --git a/checkpoint-200/global_step200/zero_pp_rank_4_mp_rank_00_model_states.pt b/checkpoint-200/global_step200/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe99fac5056bdb223728243eb9163fbdbb8c2258 --- /dev/null +++ b/checkpoint-200/global_step200/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df13066170e29f80fb40b1c4b55ab35d43e68c59edf0323589b9bc9992b582e9 +size 443182 diff --git a/checkpoint-200/global_step200/zero_pp_rank_5_mp_rank_00_model_states.pt b/checkpoint-200/global_step200/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d24908310917b03cee09067a2c298eb36610aa1f --- /dev/null +++ b/checkpoint-200/global_step200/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f44b604569fad50ce708c749c67ed4f917424c2765961030901a86441256071 +size 443182 diff --git a/checkpoint-200/global_step200/zero_pp_rank_6_mp_rank_00_model_states.pt b/checkpoint-200/global_step200/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..91a139edd9d6d97f577993c5245a2053e88b87a8 --- /dev/null +++ b/checkpoint-200/global_step200/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64512dcfc2eb383402c182c7e90e4df6571d9a9d155d32e87ef099db8205ede +size 443182 diff --git a/checkpoint-200/latest b/checkpoint-200/latest new file mode 100644 index 0000000000000000000000000000000000000000..753e24e10f3a2489150f458205cf759fd8b6081f --- /dev/null +++ b/checkpoint-200/latest @@ -0,0 +1 @@ +global_step200 \ No newline at end of file diff --git a/checkpoint-200/rng_state_0.pth b/checkpoint-200/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..c1a9ec9a7728d9bf732b53099c887fac5b216291 --- /dev/null +++ b/checkpoint-200/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80843ed4257159510cfe5df3f2214b9a4d650a6937591f2a3495d204b158730 +size 15728 diff --git a/checkpoint-200/rng_state_1.pth b/checkpoint-200/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e1c244751f5f6eeac08c755b85d53e07898d5f9 --- /dev/null +++ b/checkpoint-200/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e236489ae5d8e31a7f57f7d0511639295147693f80740ac66efaac2b5cc9ca56 +size 15728 diff --git a/checkpoint-200/rng_state_2.pth b/checkpoint-200/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..9018cf2a9a6e2115c15b06223f116d60ceaa933c --- /dev/null +++ b/checkpoint-200/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9467711bd05c583e935f57f605df794c5430a96d78cd9c3f2b9ac08286c40a78 +size 15728 diff --git a/checkpoint-200/rng_state_3.pth b/checkpoint-200/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..c00711c0688086fb08a1d05d29dc6dfa69955269 --- /dev/null +++ b/checkpoint-200/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d7549f4b04532ca4315205821a6c3c3309b35f8cc4f5436c75c04f2f8c613f +size 15792 diff --git a/checkpoint-200/rng_state_4.pth b/checkpoint-200/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..7f3609abaf547c388bb3dd019a0e2f22052688bf --- /dev/null +++ b/checkpoint-200/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ae2084258a24eaef7f3e8595255a9b721dd4b401a1f48e2e6d6a3addce5b55 +size 15728 diff --git a/checkpoint-200/rng_state_5.pth b/checkpoint-200/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c647cf2c031357bb0f4b0572a9c4dacbc3f0eae --- /dev/null +++ b/checkpoint-200/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:182672a26fabf38482fb3f1a39f99cbdec8d0a1dddcf96d324d94d92cb19751f +size 15728 diff --git a/checkpoint-200/rng_state_6.pth b/checkpoint-200/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bad9b7a8195820e27c67e4a7ac64d1eaa071fce --- /dev/null +++ b/checkpoint-200/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b86189dc2a25f9083423b1a08d5c7ae7b44b37f0bb2516bb883d9a0c0c11a0f +size 15728 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a1bf7547b3b35501c1b6a1f28f856821fbde403 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1d829f52b7eda5b86ef30cb486ed3addca825e7c282692a9bc15271fe74236b +size 1064 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-200/tokenizer.json b/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..92cc72bfcc2faff4ba96750b21c7d2e3cb92d25c --- /dev/null +++ b/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ff5472d095ccd9332d9e723153d7bc7226cb6be9c1bffda738b5ba2e71bf26 +size 17210084 diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ae1bc63bd6e5ca8a863628311061c143679ff93 --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c6ee11b7ce14f95cc2f407a70ad9dfd0fe1ab507 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,2134 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1694915254237288, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio": 0.0, + "completion_length": 396.3571472167969, + "epoch": 0.000847457627118644, + "grad_norm": 0.028597827622128653, + "learning_rate": 1.6949152542372883e-07, + "loss": 0.0096, + "num_tokens": 29860.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 1 + }, + { + "clip_ratio": 0.0, + "epoch": 0.001694915254237288, + "grad_norm": 0.0283861264343528, + "learning_rate": 3.3898305084745766e-07, + "loss": 0.0096, + "step": 2 + }, + { + "clip_ratio": 0.0005210353410802782, + "epoch": 0.002542372881355932, + "grad_norm": 0.024416377206652233, + "learning_rate": 5.084745762711865e-07, + "loss": 0.0095, + "step": 3 + }, + { + "clip_ratio": 0.0003804714942816645, + "epoch": 0.003389830508474576, + "grad_norm": 0.024954590093213137, + "learning_rate": 6.779661016949153e-07, + "loss": 0.0096, + "step": 4 + }, + { + "clip_ratio": 0.00028131139697507024, + "completion_length": 477.6250305175781, + "epoch": 0.00423728813559322, + "grad_norm": 0.0, + "learning_rate": 8.474576271186441e-07, + "loss": 0.0, + "num_tokens": 64207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 5 + }, + { + "clip_ratio": 0.00026464727125130594, + "epoch": 0.005084745762711864, + "grad_norm": 0.0, + "learning_rate": 1.016949152542373e-06, + "loss": 0.0, + "step": 6 + }, + { + "clip_ratio": 0.0003427764168009162, + "epoch": 0.005932203389830509, + "grad_norm": 0.0, + "learning_rate": 1.186440677966102e-06, + "loss": 0.0, + "step": 7 + }, + { + "clip_ratio": 0.0003427252813708037, + "epoch": 0.006779661016949152, + "grad_norm": 0.0, + "learning_rate": 1.3559322033898307e-06, + "loss": 0.0, + "step": 8 + }, + { + "clip_ratio": 0.0003535364812705666, + "completion_length": 503.14288330078125, + "epoch": 0.007627118644067797, + "grad_norm": 0.0, + "learning_rate": 1.5254237288135596e-06, + "loss": 0.0, + "num_tokens": 99207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 9 + }, + { + "clip_ratio": 0.00017467686848249286, + "epoch": 0.00847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.6949152542372882e-06, + "loss": 0.0, + "step": 10 + }, + { + "clip_ratio": 0.0002140275464626029, + "epoch": 0.009322033898305085, + "grad_norm": 0.0, + "learning_rate": 1.8644067796610171e-06, + "loss": 0.0, + "step": 11 + }, + { + "clip_ratio": 0.00035844597732648253, + "epoch": 0.010169491525423728, + "grad_norm": 0.0, + "learning_rate": 2.033898305084746e-06, + "loss": 0.0, + "step": 12 + }, + { + "clip_ratio": 0.00035540881799533963, + "completion_length": 471.83929443359375, + "epoch": 0.011016949152542373, + "grad_norm": 0.0, + "learning_rate": 2.203389830508475e-06, + "loss": 0.0, + "num_tokens": 132582.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 13 + }, + { + "clip_ratio": 0.0002507771132513881, + "epoch": 0.011864406779661017, + "grad_norm": 0.0, + "learning_rate": 2.372881355932204e-06, + "loss": 0.0, + "step": 14 + }, + { + "clip_ratio": 0.0001079499488696456, + "epoch": 0.012711864406779662, + "grad_norm": 0.0, + "learning_rate": 2.5423728813559323e-06, + "loss": 0.0, + "step": 15 + }, + { + "clip_ratio": 0.00021258163906168193, + "epoch": 0.013559322033898305, + "grad_norm": 0.0, + "learning_rate": 2.7118644067796613e-06, + "loss": 0.0, + "step": 16 + }, + { + "clip_ratio": 0.000322989042615518, + "completion_length": 387.14288330078125, + "epoch": 0.01440677966101695, + "grad_norm": 0.016452011518392446, + "learning_rate": 2.8813559322033903e-06, + "loss": 0.0658, + "num_tokens": 161406.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 17 + }, + { + "clip_ratio": 0.00034964055521413684, + "epoch": 0.015254237288135594, + "grad_norm": 0.017719451531367687, + "learning_rate": 3.0508474576271192e-06, + "loss": 0.0657, + "step": 18 + }, + { + "clip_ratio": 0.0004103984101675451, + "epoch": 0.016101694915254237, + "grad_norm": 0.016469439956852048, + "learning_rate": 3.2203389830508473e-06, + "loss": 0.0657, + "step": 19 + }, + { + "clip_ratio": 0.0003408819029573351, + "epoch": 0.01694915254237288, + "grad_norm": 0.017326107824003897, + "learning_rate": 3.3898305084745763e-06, + "loss": 0.0657, + "step": 20 + }, + { + "clip_ratio": 0.00046000577276572585, + "completion_length": 481.732177734375, + "epoch": 0.017796610169491526, + "grad_norm": 0.0, + "learning_rate": 3.5593220338983053e-06, + "loss": 0.0, + "num_tokens": 195711.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 21 + }, + { + "clip_ratio": 0.00042848457815125585, + "epoch": 0.01864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.7288135593220342e-06, + "loss": 0.0, + "step": 22 + }, + { + "clip_ratio": 0.0004297326668165624, + "epoch": 0.019491525423728815, + "grad_norm": 0.0, + "learning_rate": 3.898305084745763e-06, + "loss": 0.0, + "step": 23 + }, + { + "clip_ratio": 0.000281251355772838, + "epoch": 0.020338983050847456, + "grad_norm": 0.0, + "learning_rate": 4.067796610169492e-06, + "loss": 0.0, + "step": 24 + }, + { + "clip_ratio": 0.00017563004803378135, + "completion_length": 442.7500305175781, + "epoch": 0.0211864406779661, + "grad_norm": 0.11157048303951664, + "learning_rate": 4.23728813559322e-06, + "loss": 0.0104, + "num_tokens": 227185.0, + "reward": -0.8214285969734192, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 25 + }, + { + "clip_ratio": 0.00010569583537289873, + "epoch": 0.022033898305084745, + "grad_norm": 0.12213723346474271, + "learning_rate": 4.40677966101695e-06, + "loss": 0.0104, + "step": 26 + }, + { + "clip_ratio": 0.0005364188691601157, + "epoch": 0.02288135593220339, + "grad_norm": 0.11319483991164629, + "learning_rate": 4.576271186440678e-06, + "loss": 0.0106, + "step": 27 + }, + { + "clip_ratio": 0.0010358322178944945, + "epoch": 0.023728813559322035, + "grad_norm": 0.10119136649790463, + "learning_rate": 4.745762711864408e-06, + "loss": 0.0101, + "step": 28 + }, + { + "clip_ratio": 0.0002854761842172593, + "completion_length": 420.51788330078125, + "epoch": 0.02457627118644068, + "grad_norm": 0.0, + "learning_rate": 4.915254237288136e-06, + "loss": 0.0, + "num_tokens": 257614.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 29 + }, + { + "clip_ratio": 0.00021371705224737525, + "epoch": 0.025423728813559324, + "grad_norm": 0.0, + "learning_rate": 5.084745762711865e-06, + "loss": 0.0, + "step": 30 + }, + { + "clip_ratio": 0.00016422003682237118, + "epoch": 0.026271186440677965, + "grad_norm": 0.0, + "learning_rate": 5.254237288135594e-06, + "loss": 0.0, + "step": 31 + }, + { + "clip_ratio": 0.000256577244726941, + "epoch": 0.02711864406779661, + "grad_norm": 0.0, + "learning_rate": 5.423728813559323e-06, + "loss": 0.0, + "step": 32 + }, + { + "clip_ratio": 0.00045646229409612715, + "completion_length": 465.1250305175781, + "epoch": 0.027966101694915254, + "grad_norm": 0.017873238036622066, + "learning_rate": 5.593220338983051e-06, + "loss": 0.0246, + "num_tokens": 290581.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 33 + }, + { + "clip_ratio": 0.0006314113852567971, + "epoch": 0.0288135593220339, + "grad_norm": 0.01732638271233714, + "learning_rate": 5.7627118644067805e-06, + "loss": 0.0247, + "step": 34 + }, + { + "clip_ratio": 0.00045800459338352084, + "epoch": 0.029661016949152543, + "grad_norm": 0.017593288926627842, + "learning_rate": 5.932203389830509e-06, + "loss": 0.0247, + "step": 35 + }, + { + "clip_ratio": 0.0004213759966660291, + "epoch": 0.030508474576271188, + "grad_norm": 0.017758527483606314, + "learning_rate": 6.1016949152542385e-06, + "loss": 0.0247, + "step": 36 + }, + { + "clip_ratio": 0.00027920620050281286, + "completion_length": 487.982177734375, + "epoch": 0.03135593220338983, + "grad_norm": 0.017492673426871806, + "learning_rate": 6.271186440677966e-06, + "loss": 0.0287, + "num_tokens": 325036.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 37 + }, + { + "clip_ratio": 0.0003654122701846063, + "epoch": 0.03220338983050847, + "grad_norm": 0.016942624524485753, + "learning_rate": 6.440677966101695e-06, + "loss": 0.0287, + "step": 38 + }, + { + "clip_ratio": 0.0002445173158776015, + "epoch": 0.03305084745762712, + "grad_norm": 0.017357366453315624, + "learning_rate": 6.610169491525424e-06, + "loss": 0.0287, + "step": 39 + }, + { + "clip_ratio": 0.00027939456049352884, + "epoch": 0.03389830508474576, + "grad_norm": 0.017497160548341977, + "learning_rate": 6.779661016949153e-06, + "loss": 0.0287, + "step": 40 + }, + { + "clip_ratio": 0.00030169120873324573, + "completion_length": 337.76788330078125, + "epoch": 0.03474576271186441, + "grad_norm": 0.013386997712677729, + "learning_rate": 6.949152542372882e-06, + "loss": 0.0194, + "num_tokens": 351879.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 41 + }, + { + "clip_ratio": 0.000481679366203025, + "epoch": 0.03559322033898305, + "grad_norm": 0.013534365829241167, + "learning_rate": 7.1186440677966106e-06, + "loss": 0.0194, + "step": 42 + }, + { + "clip_ratio": 0.0006071141688153148, + "epoch": 0.036440677966101696, + "grad_norm": 0.013688658779614732, + "learning_rate": 7.288135593220339e-06, + "loss": 0.0193, + "step": 43 + }, + { + "clip_ratio": 0.0005443710251711309, + "epoch": 0.03728813559322034, + "grad_norm": 0.013415623466192152, + "learning_rate": 7.4576271186440685e-06, + "loss": 0.0194, + "step": 44 + }, + { + "clip_ratio": 0.00027171947294846177, + "completion_length": 358.6964416503906, + "epoch": 0.038135593220338986, + "grad_norm": 0.0, + "learning_rate": 7.627118644067797e-06, + "loss": 0.0, + "num_tokens": 379414.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 45 + }, + { + "clip_ratio": 0.00027013494400307536, + "epoch": 0.03898305084745763, + "grad_norm": 0.0, + "learning_rate": 7.796610169491526e-06, + "loss": 0.0, + "step": 46 + }, + { + "clip_ratio": 0.00023684222833253443, + "epoch": 0.03983050847457627, + "grad_norm": 0.0, + "learning_rate": 7.966101694915255e-06, + "loss": 0.0, + "step": 47 + }, + { + "clip_ratio": 0.0004315820406191051, + "epoch": 0.04067796610169491, + "grad_norm": 0.0, + "learning_rate": 8.135593220338983e-06, + "loss": 0.0, + "step": 48 + }, + { + "clip_ratio": 0.00034640118246898055, + "completion_length": 392.46429443359375, + "epoch": 0.04152542372881356, + "grad_norm": 0.05155975490631469, + "learning_rate": 8.305084745762712e-06, + "loss": -0.023, + "num_tokens": 408424.0, + "reward": -0.8571429252624512, + "reward_std": 0.24888646602630615, + "rewards/check_winston_local_func/mean": -0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 49 + }, + { + "clip_ratio": 0.00034579477505758405, + "epoch": 0.0423728813559322, + "grad_norm": 0.051568368553185584, + "learning_rate": 8.47457627118644e-06, + "loss": -0.0233, + "step": 50 + }, + { + "clip_ratio": 0.0005872369511052966, + "epoch": 0.043220338983050846, + "grad_norm": 0.054569986775825835, + "learning_rate": 8.64406779661017e-06, + "loss": -0.0235, + "step": 51 + }, + { + "clip_ratio": 0.00048618926666677, + "epoch": 0.04406779661016949, + "grad_norm": 0.05573624590215382, + "learning_rate": 8.8135593220339e-06, + "loss": -0.0236, + "step": 52 + }, + { + "clip_ratio": 0.000333156727720052, + "completion_length": 485.7500305175781, + "epoch": 0.044915254237288135, + "grad_norm": 0.0, + "learning_rate": 8.983050847457628e-06, + "loss": 0.0, + "num_tokens": 442986.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 53 + }, + { + "clip_ratio": 0.00042045177542604506, + "epoch": 0.04576271186440678, + "grad_norm": 0.0, + "learning_rate": 9.152542372881356e-06, + "loss": 0.0, + "step": 54 + }, + { + "clip_ratio": 0.00031678256345912814, + "epoch": 0.046610169491525424, + "grad_norm": 0.0, + "learning_rate": 9.322033898305085e-06, + "loss": 0.0, + "step": 55 + }, + { + "clip_ratio": 0.00010463170474395156, + "epoch": 0.04745762711864407, + "grad_norm": 0.0, + "learning_rate": 9.491525423728815e-06, + "loss": 0.0, + "step": 56 + }, + { + "clip_ratio": 0.0007074553286656737, + "completion_length": 428.3214416503906, + "epoch": 0.048305084745762714, + "grad_norm": 0.04153528214569023, + "learning_rate": 9.661016949152544e-06, + "loss": 0.0343, + "num_tokens": 473892.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 57 + }, + { + "clip_ratio": 0.0004013319849036634, + "epoch": 0.04915254237288136, + "grad_norm": 0.04657277213309362, + "learning_rate": 9.830508474576272e-06, + "loss": 0.0342, + "step": 58 + }, + { + "clip_ratio": 0.00044179416727274656, + "epoch": 0.05, + "grad_norm": 0.045153415468062494, + "learning_rate": 1e-05, + "loss": 0.0343, + "step": 59 + }, + { + "clip_ratio": 0.0007794442353770137, + "epoch": 0.05084745762711865, + "grad_norm": 0.035363902861678634, + "learning_rate": 1.016949152542373e-05, + "loss": 0.0339, + "step": 60 + }, + { + "clip_ratio": 0.00021712151647079736, + "completion_length": 299.8035888671875, + "epoch": 0.051694915254237285, + "grad_norm": 0.07205399219848665, + "learning_rate": 1.0338983050847458e-05, + "loss": 0.0477, + "num_tokens": 497465.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 61 + }, + { + "clip_ratio": 0.0002563712769187987, + "epoch": 0.05254237288135593, + "grad_norm": 0.07155354465871978, + "learning_rate": 1.0508474576271188e-05, + "loss": 0.0475, + "step": 62 + }, + { + "clip_ratio": 0.0001442718057660386, + "epoch": 0.053389830508474574, + "grad_norm": 0.07289445064494822, + "learning_rate": 1.0677966101694917e-05, + "loss": 0.0474, + "step": 63 + }, + { + "clip_ratio": 0.001116903149522841, + "epoch": 0.05423728813559322, + "grad_norm": 0.06596181254777028, + "learning_rate": 1.0847457627118645e-05, + "loss": 0.0468, + "step": 64 + }, + { + "clip_ratio": 0.00027901786961592734, + "completion_length": 480.4464416503906, + "epoch": 0.05508474576271186, + "grad_norm": 0.0, + "learning_rate": 1.1016949152542374e-05, + "loss": 0.0, + "num_tokens": 532266.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 65 + }, + { + "clip_ratio": 0.00037270825123414397, + "epoch": 0.05593220338983051, + "grad_norm": 0.0, + "learning_rate": 1.1186440677966102e-05, + "loss": 0.0, + "step": 66 + }, + { + "clip_ratio": 0.0006563978386111557, + "epoch": 0.05677966101694915, + "grad_norm": 0.0, + "learning_rate": 1.1355932203389833e-05, + "loss": 0.0, + "step": 67 + }, + { + "clip_ratio": 0.0008186621707864106, + "epoch": 0.0576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.1525423728813561e-05, + "loss": 0.0, + "step": 68 + }, + { + "clip_ratio": 0.0005370522267185152, + "completion_length": 420.3214416503906, + "epoch": 0.05847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.169491525423729e-05, + "loss": 0.0, + "num_tokens": 563380.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 69 + }, + { + "clip_ratio": 0.0007551547605544329, + "epoch": 0.059322033898305086, + "grad_norm": 0.0, + "learning_rate": 1.1864406779661018e-05, + "loss": 0.0, + "step": 70 + }, + { + "clip_ratio": 0.0004996137577109039, + "epoch": 0.06016949152542373, + "grad_norm": 0.0, + "learning_rate": 1.2033898305084745e-05, + "loss": 0.0, + "step": 71 + }, + { + "clip_ratio": 0.0007176484214141965, + "epoch": 0.061016949152542375, + "grad_norm": 0.0, + "learning_rate": 1.2203389830508477e-05, + "loss": 0.0, + "step": 72 + }, + { + "clip_ratio": 0.0004170738684479147, + "completion_length": 383.6964416503906, + "epoch": 0.06186440677966102, + "grad_norm": 0.01481240616851262, + "learning_rate": 1.2372881355932205e-05, + "loss": 0.0412, + "num_tokens": 592003.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 73 + }, + { + "clip_ratio": 0.0008365331450477242, + "epoch": 0.06271186440677966, + "grad_norm": 0.01522897212214854, + "learning_rate": 1.2542372881355932e-05, + "loss": 0.0411, + "step": 74 + }, + { + "clip_ratio": 0.000981268472969532, + "epoch": 0.0635593220338983, + "grad_norm": 0.014948882448171377, + "learning_rate": 1.2711864406779661e-05, + "loss": 0.0411, + "step": 75 + }, + { + "clip_ratio": 0.0006704007391817868, + "epoch": 0.06440677966101695, + "grad_norm": 0.015045917131498382, + "learning_rate": 1.288135593220339e-05, + "loss": 0.041, + "step": 76 + }, + { + "clip_ratio": 0.00022424904454965144, + "completion_length": 437.9821472167969, + "epoch": 0.06525423728813559, + "grad_norm": 0.030968041587588573, + "learning_rate": 1.305084745762712e-05, + "loss": 0.0453, + "num_tokens": 623050.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 77 + }, + { + "clip_ratio": 0.00053448136895895, + "epoch": 0.06610169491525424, + "grad_norm": 0.02976001587219013, + "learning_rate": 1.3220338983050848e-05, + "loss": 0.0453, + "step": 78 + }, + { + "clip_ratio": 0.0010130176087841392, + "epoch": 0.06694915254237288, + "grad_norm": 0.02743385432574901, + "learning_rate": 1.3389830508474577e-05, + "loss": 0.045, + "step": 79 + }, + { + "clip_ratio": 0.0011749044060707092, + "epoch": 0.06779661016949153, + "grad_norm": 0.025462048937107604, + "learning_rate": 1.3559322033898305e-05, + "loss": 0.045, + "step": 80 + }, + { + "clip_ratio": 0.001996266655623913, + "completion_length": 382.2321472167969, + "epoch": 0.06864406779661017, + "grad_norm": 0.13457631329414246, + "learning_rate": 1.3728813559322034e-05, + "loss": 0.0135, + "num_tokens": 651839.0, + "reward": -0.6785714626312256, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.6785714030265808, + "rewards/check_winston_local_func/std": 0.7411819696426392, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 81 + }, + { + "clip_ratio": 0.003203267464414239, + "epoch": 0.06949152542372881, + "grad_norm": 0.11807541511453928, + "learning_rate": 1.3898305084745764e-05, + "loss": 0.0128, + "step": 82 + }, + { + "clip_ratio": 0.011069249361753464, + "epoch": 0.07033898305084746, + "grad_norm": 0.0768781703261771, + "learning_rate": 1.4067796610169493e-05, + "loss": 0.0118, + "step": 83 + }, + { + "clip_ratio": 0.013229678384959698, + "epoch": 0.0711864406779661, + "grad_norm": 0.07925229229917279, + "learning_rate": 1.4237288135593221e-05, + "loss": 0.011, + "step": 84 + }, + { + "clip_ratio": 0.0002107896434608847, + "completion_length": 397.1964416503906, + "epoch": 0.07203389830508475, + "grad_norm": 0.0461083173277337, + "learning_rate": 1.440677966101695e-05, + "loss": 0.0389, + "num_tokens": 681218.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 85 + }, + { + "clip_ratio": 0.0010596371721476316, + "epoch": 0.07288135593220339, + "grad_norm": 0.04449467794694347, + "learning_rate": 1.4576271186440678e-05, + "loss": 0.0384, + "step": 86 + }, + { + "clip_ratio": 0.002870997181162238, + "epoch": 0.07372881355932204, + "grad_norm": 0.038978879976910054, + "learning_rate": 1.4745762711864408e-05, + "loss": 0.038, + "step": 87 + }, + { + "clip_ratio": 0.006624125875532627, + "epoch": 0.07457627118644068, + "grad_norm": 0.0364842012372814, + "learning_rate": 1.4915254237288137e-05, + "loss": 0.0377, + "step": 88 + }, + { + "clip_ratio": 0.00043057286529801786, + "completion_length": 399.64288330078125, + "epoch": 0.07542372881355933, + "grad_norm": 0.014090924578944663, + "learning_rate": 1.5084745762711865e-05, + "loss": 0.0328, + "num_tokens": 711078.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 89 + }, + { + "clip_ratio": 0.0018296982161700726, + "epoch": 0.07627118644067797, + "grad_norm": 0.014531205963070252, + "learning_rate": 1.5254237288135594e-05, + "loss": 0.0328, + "step": 90 + }, + { + "clip_ratio": 0.004530549980700016, + "epoch": 0.07711864406779662, + "grad_norm": 0.014754831265979268, + "learning_rate": 1.5423728813559326e-05, + "loss": 0.0327, + "step": 91 + }, + { + "clip_ratio": 0.008132151328027248, + "epoch": 0.07796610169491526, + "grad_norm": 0.014608619166449479, + "learning_rate": 1.5593220338983053e-05, + "loss": 0.0326, + "step": 92 + }, + { + "clip_ratio": 0.0007373582920990884, + "completion_length": 467.71429443359375, + "epoch": 0.0788135593220339, + "grad_norm": 0.041297580984419976, + "learning_rate": 1.576271186440678e-05, + "loss": 0.0616, + "num_tokens": 745862.0, + "reward": -0.8928571939468384, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 93 + }, + { + "clip_ratio": 0.001287531922571361, + "epoch": 0.07966101694915254, + "grad_norm": 0.030858648065290283, + "learning_rate": 1.593220338983051e-05, + "loss": 0.0614, + "step": 94 + }, + { + "clip_ratio": 0.0023924303241074085, + "epoch": 0.08050847457627118, + "grad_norm": 0.03463914321182917, + "learning_rate": 1.6101694915254237e-05, + "loss": 0.0613, + "step": 95 + }, + { + "clip_ratio": 0.00350037869066, + "epoch": 0.08135593220338982, + "grad_norm": 0.02665011286164521, + "learning_rate": 1.6271186440677967e-05, + "loss": 0.0611, + "step": 96 + }, + { + "clip_ratio": 0.0006918495637364686, + "completion_length": 320.75, + "epoch": 0.08220338983050847, + "grad_norm": 0.06373891470490567, + "learning_rate": 1.6440677966101697e-05, + "loss": -0.015, + "num_tokens": 771576.0, + "reward": -0.7500000596046448, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 97 + }, + { + "clip_ratio": 0.0029753418639302254, + "epoch": 0.08305084745762711, + "grad_norm": 0.05523249333421511, + "learning_rate": 1.6610169491525424e-05, + "loss": -0.0157, + "step": 98 + }, + { + "clip_ratio": 0.00716389948502183, + "epoch": 0.08389830508474576, + "grad_norm": 0.04924083222576615, + "learning_rate": 1.6779661016949154e-05, + "loss": -0.0158, + "step": 99 + }, + { + "clip_ratio": 0.011036296375095844, + "epoch": 0.0847457627118644, + "grad_norm": 0.04955323333773024, + "learning_rate": 1.694915254237288e-05, + "loss": -0.0163, + "step": 100 + }, + { + "clip_ratio": 0.00038607188616879284, + "completion_length": 507.2500305175781, + "epoch": 0.08559322033898305, + "grad_norm": 0.0, + "learning_rate": 1.711864406779661e-05, + "loss": 0.0, + "num_tokens": 807230.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 101 + }, + { + "clip_ratio": 0.0004233713843859732, + "epoch": 0.08644067796610169, + "grad_norm": 0.0, + "learning_rate": 1.728813559322034e-05, + "loss": 0.0, + "step": 102 + }, + { + "clip_ratio": 0.0005304253427311778, + "epoch": 0.08728813559322034, + "grad_norm": 0.0, + "learning_rate": 1.745762711864407e-05, + "loss": 0.0, + "step": 103 + }, + { + "clip_ratio": 0.0008094432414509356, + "epoch": 0.08813559322033898, + "grad_norm": 0.0, + "learning_rate": 1.76271186440678e-05, + "loss": 0.0, + "step": 104 + }, + { + "clip_ratio": 0.0003136220038868487, + "completion_length": 309.4821472167969, + "epoch": 0.08898305084745763, + "grad_norm": 0.1215376293190595, + "learning_rate": 1.7796610169491526e-05, + "loss": 0.059, + "num_tokens": 830873.0, + "reward": -0.6071428656578064, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.6071428656578064, + "rewards/check_winston_local_func/std": 0.8017837405204773, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 105 + }, + { + "clip_ratio": 0.005680752452462912, + "epoch": 0.08983050847457627, + "grad_norm": 0.08882976004122672, + "learning_rate": 1.7966101694915256e-05, + "loss": 0.057, + "step": 106 + }, + { + "clip_ratio": 0.013865095563232899, + "epoch": 0.09067796610169492, + "grad_norm": 0.07178187465318808, + "learning_rate": 1.8135593220338986e-05, + "loss": 0.0551, + "step": 107 + }, + { + "clip_ratio": 0.025337526574730873, + "epoch": 0.09152542372881356, + "grad_norm": 0.05889114052835241, + "learning_rate": 1.8305084745762713e-05, + "loss": 0.054, + "step": 108 + }, + { + "clip_ratio": 0.0004973930190317333, + "completion_length": 309.2857360839844, + "epoch": 0.0923728813559322, + "grad_norm": 0.10159993090017184, + "learning_rate": 1.8474576271186443e-05, + "loss": 0.1029, + "num_tokens": 856689.0, + "reward": -0.7500000596046448, + "reward_std": 0.4123912453651428, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 109 + }, + { + "clip_ratio": 0.005504293367266655, + "epoch": 0.09322033898305085, + "grad_norm": 0.09390182129772277, + "learning_rate": 1.864406779661017e-05, + "loss": 0.1017, + "step": 110 + }, + { + "clip_ratio": 0.022907190024852753, + "epoch": 0.0940677966101695, + "grad_norm": 0.08701453983072766, + "learning_rate": 1.88135593220339e-05, + "loss": 0.0999, + "step": 111 + }, + { + "clip_ratio": 0.04514092579483986, + "epoch": 0.09491525423728814, + "grad_norm": 0.08477253768734147, + "learning_rate": 1.898305084745763e-05, + "loss": 0.0987, + "step": 112 + }, + { + "clip_ratio": 0.0005664547788910568, + "completion_length": 434.39288330078125, + "epoch": 0.09576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.9152542372881357e-05, + "loss": 0.0, + "num_tokens": 888255.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 113 + }, + { + "clip_ratio": 0.0015907255001366138, + "epoch": 0.09661016949152543, + "grad_norm": 0.0, + "learning_rate": 1.9322033898305087e-05, + "loss": 0.0, + "step": 114 + }, + { + "clip_ratio": 0.003365863347426057, + "epoch": 0.09745762711864407, + "grad_norm": 0.0, + "learning_rate": 1.9491525423728814e-05, + "loss": 0.0, + "step": 115 + }, + { + "clip_ratio": 0.006915883626788855, + "epoch": 0.09830508474576272, + "grad_norm": 0.0, + "learning_rate": 1.9661016949152545e-05, + "loss": 0.0, + "step": 116 + }, + { + "clip_ratio": 0.0015928384382277727, + "completion_length": 311.08929443359375, + "epoch": 0.09915254237288136, + "grad_norm": 0.1669528890016949, + "learning_rate": 1.9830508474576275e-05, + "loss": 0.0592, + "num_tokens": 912948.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 117 + }, + { + "clip_ratio": 0.006070761010050774, + "epoch": 0.1, + "grad_norm": 0.15701074253607375, + "learning_rate": 2e-05, + "loss": 0.056, + "step": 118 + }, + { + "clip_ratio": 0.03282368928194046, + "epoch": 0.10084745762711865, + "grad_norm": 0.21942626154682726, + "learning_rate": 2.016949152542373e-05, + "loss": 0.0526, + "step": 119 + }, + { + "clip_ratio": 0.0628986731171608, + "epoch": 0.1016949152542373, + "grad_norm": 0.1568339023062343, + "learning_rate": 2.033898305084746e-05, + "loss": 0.0497, + "step": 120 + }, + { + "clip_ratio": 0.0003240547957830131, + "completion_length": 490.607177734375, + "epoch": 0.10254237288135593, + "grad_norm": 0.0, + "learning_rate": 2.0508474576271186e-05, + "loss": 0.0, + "num_tokens": 947318.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 121 + }, + { + "clip_ratio": 0.00037375700776465237, + "epoch": 0.10338983050847457, + "grad_norm": 0.0, + "learning_rate": 2.0677966101694916e-05, + "loss": 0.0, + "step": 122 + }, + { + "clip_ratio": 0.0011371899163350463, + "epoch": 0.10423728813559321, + "grad_norm": 0.0, + "learning_rate": 2.084745762711865e-05, + "loss": 0.0, + "step": 123 + }, + { + "clip_ratio": 0.0022452734410762787, + "epoch": 0.10508474576271186, + "grad_norm": 0.0, + "learning_rate": 2.1016949152542376e-05, + "loss": 0.0, + "step": 124 + }, + { + "clip_ratio": 0.004924725275486708, + "completion_length": 324.58929443359375, + "epoch": 0.1059322033898305, + "grad_norm": 0.3997089536055672, + "learning_rate": 2.1186440677966103e-05, + "loss": 0.04, + "num_tokens": 972527.0, + "reward": -0.8214285969734192, + "reward_std": 0.36553531885147095, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 125 + }, + { + "clip_ratio": 0.036066196858882904, + "epoch": 0.10677966101694915, + "grad_norm": 0.4003737832223874, + "learning_rate": 2.1355932203389833e-05, + "loss": 0.0371, + "step": 126 + }, + { + "clip_ratio": 0.06804865598678589, + "epoch": 0.10762711864406779, + "grad_norm": 0.3262616499772286, + "learning_rate": 2.152542372881356e-05, + "loss": 0.0328, + "step": 127 + }, + { + "clip_ratio": 0.08261267095804214, + "epoch": 0.10847457627118644, + "grad_norm": 0.19475445080797668, + "learning_rate": 2.169491525423729e-05, + "loss": 0.0284, + "step": 128 + }, + { + "clip_ratio": 0.00042747953557409346, + "completion_length": 441.6785888671875, + "epoch": 0.10932203389830508, + "grad_norm": 0.07121815374577634, + "learning_rate": 2.1864406779661017e-05, + "loss": 0.0215, + "num_tokens": 1005157.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 129 + }, + { + "clip_ratio": 0.0005996564286760986, + "epoch": 0.11016949152542373, + "grad_norm": 0.07374447574020743, + "learning_rate": 2.2033898305084748e-05, + "loss": 0.021, + "step": 130 + }, + { + "clip_ratio": 0.0070611475966870785, + "epoch": 0.11101694915254237, + "grad_norm": 0.0484843694410488, + "learning_rate": 2.2203389830508474e-05, + "loss": 0.02, + "step": 131 + }, + { + "clip_ratio": 0.02419929951429367, + "epoch": 0.11186440677966102, + "grad_norm": 0.03734227928764934, + "learning_rate": 2.2372881355932205e-05, + "loss": 0.0194, + "step": 132 + }, + { + "clip_ratio": 0.0008097242680378258, + "completion_length": 299.9285888671875, + "epoch": 0.11271186440677966, + "grad_norm": 0.2037296860020652, + "learning_rate": 2.2542372881355935e-05, + "loss": 0.0123, + "num_tokens": 1029577.0, + "reward": -0.4285714626312256, + "reward_std": 0.49777287244796753, + "rewards/check_winston_local_func/mean": -0.4285714328289032, + "rewards/check_winston_local_func/std": 0.9116845726966858, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 133 + }, + { + "clip_ratio": 0.010970565490424633, + "epoch": 0.1135593220338983, + "grad_norm": 0.15172018259613887, + "learning_rate": 2.2711864406779665e-05, + "loss": 0.0095, + "step": 134 + }, + { + "clip_ratio": 0.027290966361761093, + "epoch": 0.11440677966101695, + "grad_norm": 0.14632003828933562, + "learning_rate": 2.2881355932203392e-05, + "loss": 0.0066, + "step": 135 + }, + { + "clip_ratio": 0.04884405434131622, + "epoch": 0.1152542372881356, + "grad_norm": 0.13010992493757564, + "learning_rate": 2.3050847457627122e-05, + "loss": 0.0037, + "step": 136 + }, + { + "clip_ratio": 0.00016204381245188415, + "completion_length": 397.9821472167969, + "epoch": 0.11610169491525424, + "grad_norm": 0.0819715923540025, + "learning_rate": 2.322033898305085e-05, + "loss": 0.0348, + "num_tokens": 1059368.0, + "reward": -0.7500000596046448, + "reward_std": 0.3499017357826233, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 137 + }, + { + "clip_ratio": 0.0012223825324326754, + "epoch": 0.11694915254237288, + "grad_norm": 0.07970324522981491, + "learning_rate": 2.338983050847458e-05, + "loss": 0.0336, + "step": 138 + }, + { + "clip_ratio": 0.015393489971756935, + "epoch": 0.11779661016949153, + "grad_norm": 0.07570693688371119, + "learning_rate": 2.3559322033898306e-05, + "loss": 0.0321, + "step": 139 + }, + { + "clip_ratio": 0.07253921031951904, + "epoch": 0.11864406779661017, + "grad_norm": 0.05800544884381334, + "learning_rate": 2.3728813559322036e-05, + "loss": 0.0305, + "step": 140 + }, + { + "clip_ratio": 0.00020609110652003437, + "completion_length": 376.3035888671875, + "epoch": 0.11949152542372882, + "grad_norm": 0.16488571125022886, + "learning_rate": 2.3898305084745763e-05, + "loss": -0.0156, + "num_tokens": 1088561.0, + "reward": -0.5, + "reward_std": 0.686587929725647, + "rewards/check_winston_local_func/mean": -0.5, + "rewards/check_winston_local_func/std": 0.8738628625869751, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 141 + }, + { + "clip_ratio": 0.020974619314074516, + "epoch": 0.12033898305084746, + "grad_norm": 0.12033253885509411, + "learning_rate": 2.406779661016949e-05, + "loss": -0.02, + "step": 142 + }, + { + "clip_ratio": 0.14757588505744934, + "epoch": 0.1211864406779661, + "grad_norm": 0.18906094003962706, + "learning_rate": 2.4237288135593224e-05, + "loss": -0.0215, + "step": 143 + }, + { + "clip_ratio": 0.18001240491867065, + "epoch": 0.12203389830508475, + "grad_norm": 0.2094330456679022, + "learning_rate": 2.4406779661016954e-05, + "loss": -0.0238, + "step": 144 + }, + { + "clip_ratio": 0.0010827317601069808, + "completion_length": 216.85714721679688, + "epoch": 0.1228813559322034, + "grad_norm": 0.22593574409537565, + "learning_rate": 2.457627118644068e-05, + "loss": -0.057, + "num_tokens": 1107713.0, + "reward": -0.0357142873108387, + "reward_std": 0.808063805103302, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 145 + }, + { + "clip_ratio": 0.01685175858438015, + "epoch": 0.12372881355932204, + "grad_norm": 0.21920453847219976, + "learning_rate": 2.474576271186441e-05, + "loss": -0.0622, + "step": 146 + }, + { + "clip_ratio": 0.05698274075984955, + "epoch": 0.12457627118644068, + "grad_norm": 0.23790061749019706, + "learning_rate": 2.4915254237288138e-05, + "loss": -0.0672, + "step": 147 + }, + { + "clip_ratio": 0.06983836740255356, + "epoch": 0.12542372881355932, + "grad_norm": 0.19359662720887325, + "learning_rate": 2.5084745762711865e-05, + "loss": -0.0724, + "step": 148 + }, + { + "clip_ratio": 0.0013232758501544595, + "completion_length": 251.96429443359375, + "epoch": 0.12627118644067797, + "grad_norm": 0.27961740628458276, + "learning_rate": 2.5254237288135595e-05, + "loss": 0.06, + "num_tokens": 1129487.0, + "reward": -0.0357142873108387, + "reward_std": 0.9462584257125854, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 149 + }, + { + "clip_ratio": 0.03364234417676926, + "epoch": 0.1271186440677966, + "grad_norm": 0.19276991014072303, + "learning_rate": 2.5423728813559322e-05, + "loss": 0.054, + "step": 150 + }, + { + "clip_ratio": 0.1430949568748474, + "epoch": 0.12796610169491526, + "grad_norm": 0.2768368269508983, + "learning_rate": 2.5593220338983052e-05, + "loss": 0.0518, + "step": 151 + }, + { + "clip_ratio": 0.16415317356586456, + "epoch": 0.1288135593220339, + "grad_norm": 0.25743304440606246, + "learning_rate": 2.576271186440678e-05, + "loss": 0.0475, + "step": 152 + }, + { + "clip_ratio": 0.0013469145633280277, + "completion_length": 204.48214721679688, + "epoch": 0.12966101694915255, + "grad_norm": 0.28188012404317475, + "learning_rate": 2.5932203389830512e-05, + "loss": 0.0527, + "num_tokens": 1148354.0, + "reward": 0.1428571492433548, + "reward_std": 0.7129831910133362, + "rewards/check_winston_local_func/mean": 0.1428571492433548, + "rewards/check_winston_local_func/std": 0.9987004995346069, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 153 + }, + { + "clip_ratio": 0.016695290803909302, + "epoch": 0.13050847457627118, + "grad_norm": 0.2641379759457116, + "learning_rate": 2.610169491525424e-05, + "loss": 0.0473, + "step": 154 + }, + { + "clip_ratio": 0.05237039551138878, + "epoch": 0.13135593220338984, + "grad_norm": 0.20691108630731772, + "learning_rate": 2.627118644067797e-05, + "loss": 0.0414, + "step": 155 + }, + { + "clip_ratio": 0.0867982804775238, + "epoch": 0.13220338983050847, + "grad_norm": 0.15341544674011254, + "learning_rate": 2.6440677966101696e-05, + "loss": 0.0351, + "step": 156 + }, + { + "clip_ratio": 0.0006545564392581582, + "completion_length": 233.9285888671875, + "epoch": 0.13305084745762713, + "grad_norm": 0.16036976523795443, + "learning_rate": 2.6610169491525427e-05, + "loss": 0.0179, + "num_tokens": 1168622.0, + "reward": 0.7142857313156128, + "reward_std": 0.4016071856021881, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 157 + }, + { + "clip_ratio": 0.00993060227483511, + "epoch": 0.13389830508474576, + "grad_norm": 0.1298083776077636, + "learning_rate": 2.6779661016949153e-05, + "loss": 0.0151, + "step": 158 + }, + { + "clip_ratio": 0.0733163133263588, + "epoch": 0.13474576271186442, + "grad_norm": 0.11590218855503849, + "learning_rate": 2.6949152542372884e-05, + "loss": 0.0125, + "step": 159 + }, + { + "clip_ratio": 0.14935636520385742, + "epoch": 0.13559322033898305, + "grad_norm": 0.16154268567658825, + "learning_rate": 2.711864406779661e-05, + "loss": 0.011, + "step": 160 + }, + { + "clip_ratio": 0.0009650280699133873, + "completion_length": 174.7678680419922, + "epoch": 0.13644067796610168, + "grad_norm": 0.15404950919743313, + "learning_rate": 2.728813559322034e-05, + "loss": 0.0078, + "num_tokens": 1185697.0, + "reward": 0.8571429252624512, + "reward_std": 0.3342905640602112, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 161 + }, + { + "clip_ratio": 0.004791476763784885, + "epoch": 0.13728813559322034, + "grad_norm": 0.12682344230599282, + "learning_rate": 2.7457627118644068e-05, + "loss": 0.0056, + "step": 162 + }, + { + "clip_ratio": 0.023417560383677483, + "epoch": 0.13813559322033897, + "grad_norm": 0.0948693079603576, + "learning_rate": 2.76271186440678e-05, + "loss": 0.003, + "step": 163 + }, + { + "clip_ratio": 0.07911951839923859, + "epoch": 0.13898305084745763, + "grad_norm": 0.09089932231497586, + "learning_rate": 2.7796610169491528e-05, + "loss": 0.0007, + "step": 164 + }, + { + "clip_ratio": 0.000979878008365631, + "completion_length": 126.64286041259766, + "epoch": 0.13983050847457626, + "grad_norm": 0.1801163708005843, + "learning_rate": 2.7966101694915258e-05, + "loss": -0.0396, + "num_tokens": 1199565.0, + "reward": 0.7500000596046448, + "reward_std": 0.3859959840774536, + "rewards/check_winston_local_func/mean": 0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 165 + }, + { + "clip_ratio": 0.009913308545947075, + "epoch": 0.14067796610169492, + "grad_norm": 0.14588220837158195, + "learning_rate": 2.8135593220338985e-05, + "loss": -0.0428, + "step": 166 + }, + { + "clip_ratio": 0.07110879570245743, + "epoch": 0.14152542372881355, + "grad_norm": 0.276973278154756, + "learning_rate": 2.8305084745762715e-05, + "loss": -0.0441, + "step": 167 + }, + { + "clip_ratio": 0.06909574568271637, + "epoch": 0.1423728813559322, + "grad_norm": 0.12488402451050255, + "learning_rate": 2.8474576271186442e-05, + "loss": -0.0494, + "step": 168 + }, + { + "clip_ratio": 0.0003819709818344563, + "completion_length": 152.73214721679688, + "epoch": 0.14322033898305084, + "grad_norm": 0.3195642927880649, + "learning_rate": 2.8644067796610172e-05, + "loss": 0.0302, + "num_tokens": 1214790.0, + "reward": 0.7142857313156128, + "reward_std": 0.3342905342578888, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 169 + }, + { + "clip_ratio": 0.015475978143513203, + "epoch": 0.1440677966101695, + "grad_norm": 0.2360393211362849, + "learning_rate": 2.88135593220339e-05, + "loss": 0.0228, + "step": 170 + }, + { + "clip_ratio": 0.08493895828723907, + "epoch": 0.14491525423728813, + "grad_norm": 0.17350104363138513, + "learning_rate": 2.8983050847457626e-05, + "loss": 0.0163, + "step": 171 + }, + { + "clip_ratio": 0.14768318831920624, + "epoch": 0.14576271186440679, + "grad_norm": 0.19569281232532856, + "learning_rate": 2.9152542372881356e-05, + "loss": 0.013, + "step": 172 + }, + { + "clip_ratio": 0.006150017958134413, + "completion_length": 186.25001525878906, + "epoch": 0.14661016949152542, + "grad_norm": 0.06068449124289285, + "learning_rate": 2.932203389830509e-05, + "loss": -0.0169, + "num_tokens": 1232564.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 173 + }, + { + "clip_ratio": 0.014521388337016106, + "epoch": 0.14745762711864407, + "grad_norm": 0.05994459441740582, + "learning_rate": 2.9491525423728817e-05, + "loss": -0.0174, + "step": 174 + }, + { + "clip_ratio": 0.04354570060968399, + "epoch": 0.1483050847457627, + "grad_norm": 0.06278027199945566, + "learning_rate": 2.9661016949152547e-05, + "loss": -0.0183, + "step": 175 + }, + { + "clip_ratio": 0.10504651814699173, + "epoch": 0.14915254237288136, + "grad_norm": 0.04416483226500781, + "learning_rate": 2.9830508474576274e-05, + "loss": -0.0193, + "step": 176 + }, + { + "clip_ratio": 0.003162125591188669, + "completion_length": 163.17857360839844, + "epoch": 0.15, + "grad_norm": 0.11359153510598317, + "learning_rate": 3.0000000000000004e-05, + "loss": -0.0335, + "num_tokens": 1248550.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 177 + }, + { + "clip_ratio": 0.01111722644418478, + "epoch": 0.15084745762711865, + "grad_norm": 0.10416258904679447, + "learning_rate": 3.016949152542373e-05, + "loss": -0.0347, + "step": 178 + }, + { + "clip_ratio": 0.04117439687252045, + "epoch": 0.15169491525423728, + "grad_norm": 0.08204255975558637, + "learning_rate": 3.0338983050847458e-05, + "loss": -0.0364, + "step": 179 + }, + { + "clip_ratio": 0.08657827973365784, + "epoch": 0.15254237288135594, + "grad_norm": 0.08178448057500348, + "learning_rate": 3.0508474576271188e-05, + "loss": -0.038, + "step": 180 + }, + { + "clip_ratio": 0.010199248790740967, + "completion_length": 162.35714721679688, + "epoch": 0.15338983050847457, + "grad_norm": 0.4819050859019718, + "learning_rate": 3.067796610169492e-05, + "loss": 0.06, + "num_tokens": 1264994.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 181 + }, + { + "clip_ratio": 0.060436759144067764, + "epoch": 0.15423728813559323, + "grad_norm": 0.1959898799735129, + "learning_rate": 3.084745762711865e-05, + "loss": 0.0533, + "step": 182 + }, + { + "clip_ratio": 0.13463598489761353, + "epoch": 0.15508474576271186, + "grad_norm": 0.12678282333898375, + "learning_rate": 3.101694915254238e-05, + "loss": 0.0482, + "step": 183 + }, + { + "clip_ratio": 0.19176946580410004, + "epoch": 0.15593220338983052, + "grad_norm": 0.10756609820315277, + "learning_rate": 3.1186440677966106e-05, + "loss": 0.0463, + "step": 184 + }, + { + "clip_ratio": 0.0008241009199991822, + "completion_length": 237.60714721679688, + "epoch": 0.15677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.135593220338983e-05, + "loss": 0.0, + "num_tokens": 1286164.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 185 + }, + { + "clip_ratio": 0.002994579030200839, + "epoch": 0.1576271186440678, + "grad_norm": 0.0, + "learning_rate": 3.152542372881356e-05, + "loss": 0.0, + "step": 186 + }, + { + "clip_ratio": 0.00574068445712328, + "epoch": 0.15847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.169491525423729e-05, + "loss": 0.0, + "step": 187 + }, + { + "clip_ratio": 0.012791804037988186, + "epoch": 0.15932203389830507, + "grad_norm": 0.0, + "learning_rate": 3.186440677966102e-05, + "loss": 0.0, + "step": 188 + }, + { + "clip_ratio": 0.006764067802578211, + "completion_length": 143.94644165039062, + "epoch": 0.16016949152542373, + "grad_norm": 0.04704135237627796, + "learning_rate": 3.203389830508475e-05, + "loss": -0.0095, + "num_tokens": 1301409.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 189 + }, + { + "clip_ratio": 0.013139193877577782, + "epoch": 0.16101694915254236, + "grad_norm": 0.04532372769932697, + "learning_rate": 3.2203389830508473e-05, + "loss": -0.0098, + "step": 190 + }, + { + "clip_ratio": 0.03423069044947624, + "epoch": 0.16186440677966102, + "grad_norm": 0.040646403971755785, + "learning_rate": 3.237288135593221e-05, + "loss": -0.0105, + "step": 191 + }, + { + "clip_ratio": 0.06455554068088531, + "epoch": 0.16271186440677965, + "grad_norm": 0.03643001220928061, + "learning_rate": 3.2542372881355934e-05, + "loss": -0.0113, + "step": 192 + }, + { + "clip_ratio": 0.0007823093910701573, + "completion_length": 229.9107208251953, + "epoch": 0.1635593220338983, + "grad_norm": 0.0, + "learning_rate": 3.271186440677967e-05, + "loss": 0.0, + "num_tokens": 1321708.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 193 + }, + { + "clip_ratio": 0.0008988279732875526, + "epoch": 0.16440677966101694, + "grad_norm": 0.0, + "learning_rate": 3.2881355932203394e-05, + "loss": 0.0, + "step": 194 + }, + { + "clip_ratio": 0.003465626621618867, + "epoch": 0.1652542372881356, + "grad_norm": 0.0, + "learning_rate": 3.305084745762712e-05, + "loss": 0.0, + "step": 195 + }, + { + "clip_ratio": 0.008655412122607231, + "epoch": 0.16610169491525423, + "grad_norm": 0.0, + "learning_rate": 3.322033898305085e-05, + "loss": 0.0, + "step": 196 + }, + { + "clip_ratio": 0.0021059864666312933, + "completion_length": 166.85714721679688, + "epoch": 0.1669491525423729, + "grad_norm": 0.17307734331449404, + "learning_rate": 3.338983050847458e-05, + "loss": -0.0036, + "num_tokens": 1338540.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 197 + }, + { + "clip_ratio": 0.009274979121983051, + "epoch": 0.16779661016949152, + "grad_norm": 0.10860266060182006, + "learning_rate": 3.355932203389831e-05, + "loss": -0.0066, + "step": 198 + }, + { + "clip_ratio": 0.03715561330318451, + "epoch": 0.16864406779661018, + "grad_norm": 0.09136703784102146, + "learning_rate": 3.3728813559322035e-05, + "loss": -0.008, + "step": 199 + }, + { + "clip_ratio": 0.06759678572416306, + "epoch": 0.1694915254237288, + "grad_norm": 0.08121070179066665, + "learning_rate": 3.389830508474576e-05, + "loss": -0.009, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 2360, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1261db78e0a310bb2e0bd6333e2741bd2c4391ea --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3bd8ba987ac3c91f8253f49fc4f0e162f8c1db67922f9a6a6a7ad4757383ff +size 7544 diff --git a/checkpoint-200/zero_to_fp32.py b/checkpoint-200/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/checkpoint-200/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a0f59b7b91d61179514d6e990a01e588f1af99b0 --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.0 \ No newline at end of file diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..29e0b3a4b795e316b1a7ba9b7dc790302a9d6e0f --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "gate_proj", + "o_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65ff29c6a65fe19b91669b0a97e90caf9eab76fc --- /dev/null +++ b/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ede9b66aea55096f75a2489457a0226c38414ae39f00c51f70deec0acd0d934 +size 167832688 diff --git a/checkpoint-300/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-300/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c267a1bf6ccdae39650c0c521a2764fb515ff81a --- /dev/null +++ b/checkpoint-300/global_step300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b887d4f39a2d86661936b06b9c62ec645f6d4d8b28ec2bfea640009a7947953 +size 72284496 diff --git a/checkpoint-300/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/checkpoint-300/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba1ad9dd069e49d3820e722d06fdd2f53f2ab6ab --- /dev/null +++ b/checkpoint-300/global_step300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3822362b99fcd75439ca509d8d5e803a3edcbe3c13cafa78114566989787562 +size 72284496 diff --git a/checkpoint-300/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/checkpoint-300/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..89114ddf4fed1094b4d1379ced97aede5ff29cec --- /dev/null +++ b/checkpoint-300/global_step300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccf950418e6781df1f945fcfbfafbff4585c2655946f156b202f6bfdb552f537 +size 72284496 diff --git a/checkpoint-300/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/checkpoint-300/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..44163d0a407252eb09bd845cfd62493641c40b98 --- /dev/null +++ b/checkpoint-300/global_step300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f7db1b067e433c5e39a76c43c47816fc944423a84d5f348ff5f46223c3ea04a +size 72284496 diff --git a/checkpoint-300/global_step300/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/checkpoint-300/global_step300/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7918180f3dea22fde174083d663eb27510d89259 --- /dev/null +++ b/checkpoint-300/global_step300/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8403d0d62a1a8248720ec68af454334c0882bb7bdbe0b24a7eb2f82503491764 +size 72284496 diff --git a/checkpoint-300/global_step300/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/checkpoint-300/global_step300/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bba176fe241485076739d93d786f8d4dad24086 --- /dev/null +++ b/checkpoint-300/global_step300/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7d59dee646a4402c516f8a052a4e85d4f5b9a3c25b4e04084c644bfc7cf552a +size 72284496 diff --git a/checkpoint-300/global_step300/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/checkpoint-300/global_step300/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..71c9d9e0971d8455044258e8cd56e3e87bd6ec9e --- /dev/null +++ b/checkpoint-300/global_step300/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:525e0f796e1e6ef5103d2ab5778bf4a92e0511e78f2677de836a833e1c4c658c +size 72284496 diff --git a/checkpoint-300/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-300/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f91c2e38636037521d0e560a049757e7d5b6bd4 --- /dev/null +++ b/checkpoint-300/global_step300/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c506184a6d56f4c2025bbfa85f2c9cd5b4fe5cd0ccaf36dd619cd7d57b90662a +size 443182 diff --git a/checkpoint-300/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt b/checkpoint-300/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..87b53ad82405c0c1d093a8a74b3d8f106fc106aa --- /dev/null +++ b/checkpoint-300/global_step300/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241023438e3414fc93c24c71292437ac16cd0235da1161781f4cad2c76c15183 +size 443182 diff --git a/checkpoint-300/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt b/checkpoint-300/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08ba76db48db04f733d61e3c91c8e076f27d47af --- /dev/null +++ b/checkpoint-300/global_step300/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76616c42ee2cfb14ee1440abc7c759f117765306ecda943a11d98a6b1b895733 +size 443182 diff --git a/checkpoint-300/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt b/checkpoint-300/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..db0a6273d44e4b7017237f8da43503aba45dc8f8 --- /dev/null +++ b/checkpoint-300/global_step300/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aeb6e918bdb13349b2b92099809adda6e3bd5806e4ea5dbed4dfbac9ad5771f +size 443182 diff --git a/checkpoint-300/global_step300/zero_pp_rank_4_mp_rank_00_model_states.pt b/checkpoint-300/global_step300/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a82899017a1c65993ef024346ad8f12e2aaf1c0 --- /dev/null +++ b/checkpoint-300/global_step300/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476e8d57aade7fe1f2b167cc6794f8d5151ea7842cf99ea6ed22a62e2ddc2cc8 +size 443182 diff --git a/checkpoint-300/global_step300/zero_pp_rank_5_mp_rank_00_model_states.pt b/checkpoint-300/global_step300/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f68ec419f53453c845f341ff5a1edddec2212651 --- /dev/null +++ b/checkpoint-300/global_step300/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3805a385c71f9cd37279a60f09f3260d24d1733299821ab3c6c2f2356e8129eb +size 443182 diff --git a/checkpoint-300/global_step300/zero_pp_rank_6_mp_rank_00_model_states.pt b/checkpoint-300/global_step300/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..839d16310999f6e316d4f319ba2bd826f3e21fc4 --- /dev/null +++ b/checkpoint-300/global_step300/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae36a5579c321a7e5d8e1b1c3b615b546c25f2dad25e0be61dc93eb61e8d642d +size 443182 diff --git a/checkpoint-300/latest b/checkpoint-300/latest new file mode 100644 index 0000000000000000000000000000000000000000..6761b575fffac7f1984044dcb6446b3a51da04c8 --- /dev/null +++ b/checkpoint-300/latest @@ -0,0 +1 @@ +global_step300 \ No newline at end of file diff --git a/checkpoint-300/rng_state_0.pth b/checkpoint-300/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f3048ec037f747597f690613b58064e154701e5 --- /dev/null +++ b/checkpoint-300/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3d2102945ef5cf0caea0afc3f2900e36fdfc5f38a497467a57f8d9a6f79990 +size 15728 diff --git a/checkpoint-300/rng_state_1.pth b/checkpoint-300/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2db07040adae3e92b517cca79621c84dcb407c4 --- /dev/null +++ b/checkpoint-300/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c51b8a6a6bc4feefcee16a25930e76888909a7fc845973afc0366b13972f8f2a +size 15728 diff --git a/checkpoint-300/rng_state_2.pth b/checkpoint-300/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7aedeea7dd21c6b1983d3c90e0a79d13d93ce11b --- /dev/null +++ b/checkpoint-300/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07484478d89f48be27e9216373ac1fdd5d73a0d732c7c89b02cb8c01c3210830 +size 15728 diff --git a/checkpoint-300/rng_state_3.pth b/checkpoint-300/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0c129a08738b5007a054a816952eddedd7d4dc0 --- /dev/null +++ b/checkpoint-300/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54fed313036abafffdfd7e2748cd85e0fa730b6ee54ceb9c4d26c0d766a31e72 +size 15792 diff --git a/checkpoint-300/rng_state_4.pth b/checkpoint-300/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..9258373648baeab64ec2ea128365aa3d6e1419bf --- /dev/null +++ b/checkpoint-300/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52669fa30c31ee3d4e3aba2bb621800da53c8726b2c9b5974ebddfbded3650d5 +size 15728 diff --git a/checkpoint-300/rng_state_5.pth b/checkpoint-300/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..921960267aa2b78d4158305c123d04cae3842ad7 --- /dev/null +++ b/checkpoint-300/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d822407ec4fa6d3deff9b593dd0c94074d690460d2c8899730b6c9ef1b57c6 +size 15728 diff --git a/checkpoint-300/rng_state_6.pth b/checkpoint-300/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..eeeec8e7595f2bdd7990c029e8b047418983caa7 --- /dev/null +++ b/checkpoint-300/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:239e6c6745c83c221fbb21b8a936e43e0fbe88140b9b09eaf62274bb47dd2f0a +size 15728 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..16b409501b074d0f68c8a04ac632b728f190e0fa --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c9fbd58188842f5c4a03fbdaf6779d4fd1e85aa422c82b9fe3e4cda5ffef2b8 +size 1064 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-300/tokenizer.json b/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..92cc72bfcc2faff4ba96750b21c7d2e3cb92d25c --- /dev/null +++ b/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ff5472d095ccd9332d9e723153d7bc7226cb6be9c1bffda738b5ba2e71bf26 +size 17210084 diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ae1bc63bd6e5ca8a863628311061c143679ff93 --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b9f5c9e51c23da2056fe7f8e05204258d516bebc --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,3184 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2542372881355932, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio": 0.0, + "completion_length": 396.3571472167969, + "epoch": 0.000847457627118644, + "grad_norm": 0.028597827622128653, + "learning_rate": 1.6949152542372883e-07, + "loss": 0.0096, + "num_tokens": 29860.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 1 + }, + { + "clip_ratio": 0.0, + "epoch": 0.001694915254237288, + "grad_norm": 0.0283861264343528, + "learning_rate": 3.3898305084745766e-07, + "loss": 0.0096, + "step": 2 + }, + { + "clip_ratio": 0.0005210353410802782, + "epoch": 0.002542372881355932, + "grad_norm": 0.024416377206652233, + "learning_rate": 5.084745762711865e-07, + "loss": 0.0095, + "step": 3 + }, + { + "clip_ratio": 0.0003804714942816645, + "epoch": 0.003389830508474576, + "grad_norm": 0.024954590093213137, + "learning_rate": 6.779661016949153e-07, + "loss": 0.0096, + "step": 4 + }, + { + "clip_ratio": 0.00028131139697507024, + "completion_length": 477.6250305175781, + "epoch": 0.00423728813559322, + "grad_norm": 0.0, + "learning_rate": 8.474576271186441e-07, + "loss": 0.0, + "num_tokens": 64207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 5 + }, + { + "clip_ratio": 0.00026464727125130594, + "epoch": 0.005084745762711864, + "grad_norm": 0.0, + "learning_rate": 1.016949152542373e-06, + "loss": 0.0, + "step": 6 + }, + { + "clip_ratio": 0.0003427764168009162, + "epoch": 0.005932203389830509, + "grad_norm": 0.0, + "learning_rate": 1.186440677966102e-06, + "loss": 0.0, + "step": 7 + }, + { + "clip_ratio": 0.0003427252813708037, + "epoch": 0.006779661016949152, + "grad_norm": 0.0, + "learning_rate": 1.3559322033898307e-06, + "loss": 0.0, + "step": 8 + }, + { + "clip_ratio": 0.0003535364812705666, + "completion_length": 503.14288330078125, + "epoch": 0.007627118644067797, + "grad_norm": 0.0, + "learning_rate": 1.5254237288135596e-06, + "loss": 0.0, + "num_tokens": 99207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 9 + }, + { + "clip_ratio": 0.00017467686848249286, + "epoch": 0.00847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.6949152542372882e-06, + "loss": 0.0, + "step": 10 + }, + { + "clip_ratio": 0.0002140275464626029, + "epoch": 0.009322033898305085, + "grad_norm": 0.0, + "learning_rate": 1.8644067796610171e-06, + "loss": 0.0, + "step": 11 + }, + { + "clip_ratio": 0.00035844597732648253, + "epoch": 0.010169491525423728, + "grad_norm": 0.0, + "learning_rate": 2.033898305084746e-06, + "loss": 0.0, + "step": 12 + }, + { + "clip_ratio": 0.00035540881799533963, + "completion_length": 471.83929443359375, + "epoch": 0.011016949152542373, + "grad_norm": 0.0, + "learning_rate": 2.203389830508475e-06, + "loss": 0.0, + "num_tokens": 132582.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 13 + }, + { + "clip_ratio": 0.0002507771132513881, + "epoch": 0.011864406779661017, + "grad_norm": 0.0, + "learning_rate": 2.372881355932204e-06, + "loss": 0.0, + "step": 14 + }, + { + "clip_ratio": 0.0001079499488696456, + "epoch": 0.012711864406779662, + "grad_norm": 0.0, + "learning_rate": 2.5423728813559323e-06, + "loss": 0.0, + "step": 15 + }, + { + "clip_ratio": 0.00021258163906168193, + "epoch": 0.013559322033898305, + "grad_norm": 0.0, + "learning_rate": 2.7118644067796613e-06, + "loss": 0.0, + "step": 16 + }, + { + "clip_ratio": 0.000322989042615518, + "completion_length": 387.14288330078125, + "epoch": 0.01440677966101695, + "grad_norm": 0.016452011518392446, + "learning_rate": 2.8813559322033903e-06, + "loss": 0.0658, + "num_tokens": 161406.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 17 + }, + { + "clip_ratio": 0.00034964055521413684, + "epoch": 0.015254237288135594, + "grad_norm": 0.017719451531367687, + "learning_rate": 3.0508474576271192e-06, + "loss": 0.0657, + "step": 18 + }, + { + "clip_ratio": 0.0004103984101675451, + "epoch": 0.016101694915254237, + "grad_norm": 0.016469439956852048, + "learning_rate": 3.2203389830508473e-06, + "loss": 0.0657, + "step": 19 + }, + { + "clip_ratio": 0.0003408819029573351, + "epoch": 0.01694915254237288, + "grad_norm": 0.017326107824003897, + "learning_rate": 3.3898305084745763e-06, + "loss": 0.0657, + "step": 20 + }, + { + "clip_ratio": 0.00046000577276572585, + "completion_length": 481.732177734375, + "epoch": 0.017796610169491526, + "grad_norm": 0.0, + "learning_rate": 3.5593220338983053e-06, + "loss": 0.0, + "num_tokens": 195711.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 21 + }, + { + "clip_ratio": 0.00042848457815125585, + "epoch": 0.01864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.7288135593220342e-06, + "loss": 0.0, + "step": 22 + }, + { + "clip_ratio": 0.0004297326668165624, + "epoch": 0.019491525423728815, + "grad_norm": 0.0, + "learning_rate": 3.898305084745763e-06, + "loss": 0.0, + "step": 23 + }, + { + "clip_ratio": 0.000281251355772838, + "epoch": 0.020338983050847456, + "grad_norm": 0.0, + "learning_rate": 4.067796610169492e-06, + "loss": 0.0, + "step": 24 + }, + { + "clip_ratio": 0.00017563004803378135, + "completion_length": 442.7500305175781, + "epoch": 0.0211864406779661, + "grad_norm": 0.11157048303951664, + "learning_rate": 4.23728813559322e-06, + "loss": 0.0104, + "num_tokens": 227185.0, + "reward": -0.8214285969734192, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 25 + }, + { + "clip_ratio": 0.00010569583537289873, + "epoch": 0.022033898305084745, + "grad_norm": 0.12213723346474271, + "learning_rate": 4.40677966101695e-06, + "loss": 0.0104, + "step": 26 + }, + { + "clip_ratio": 0.0005364188691601157, + "epoch": 0.02288135593220339, + "grad_norm": 0.11319483991164629, + "learning_rate": 4.576271186440678e-06, + "loss": 0.0106, + "step": 27 + }, + { + "clip_ratio": 0.0010358322178944945, + "epoch": 0.023728813559322035, + "grad_norm": 0.10119136649790463, + "learning_rate": 4.745762711864408e-06, + "loss": 0.0101, + "step": 28 + }, + { + "clip_ratio": 0.0002854761842172593, + "completion_length": 420.51788330078125, + "epoch": 0.02457627118644068, + "grad_norm": 0.0, + "learning_rate": 4.915254237288136e-06, + "loss": 0.0, + "num_tokens": 257614.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 29 + }, + { + "clip_ratio": 0.00021371705224737525, + "epoch": 0.025423728813559324, + "grad_norm": 0.0, + "learning_rate": 5.084745762711865e-06, + "loss": 0.0, + "step": 30 + }, + { + "clip_ratio": 0.00016422003682237118, + "epoch": 0.026271186440677965, + "grad_norm": 0.0, + "learning_rate": 5.254237288135594e-06, + "loss": 0.0, + "step": 31 + }, + { + "clip_ratio": 0.000256577244726941, + "epoch": 0.02711864406779661, + "grad_norm": 0.0, + "learning_rate": 5.423728813559323e-06, + "loss": 0.0, + "step": 32 + }, + { + "clip_ratio": 0.00045646229409612715, + "completion_length": 465.1250305175781, + "epoch": 0.027966101694915254, + "grad_norm": 0.017873238036622066, + "learning_rate": 5.593220338983051e-06, + "loss": 0.0246, + "num_tokens": 290581.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 33 + }, + { + "clip_ratio": 0.0006314113852567971, + "epoch": 0.0288135593220339, + "grad_norm": 0.01732638271233714, + "learning_rate": 5.7627118644067805e-06, + "loss": 0.0247, + "step": 34 + }, + { + "clip_ratio": 0.00045800459338352084, + "epoch": 0.029661016949152543, + "grad_norm": 0.017593288926627842, + "learning_rate": 5.932203389830509e-06, + "loss": 0.0247, + "step": 35 + }, + { + "clip_ratio": 0.0004213759966660291, + "epoch": 0.030508474576271188, + "grad_norm": 0.017758527483606314, + "learning_rate": 6.1016949152542385e-06, + "loss": 0.0247, + "step": 36 + }, + { + "clip_ratio": 0.00027920620050281286, + "completion_length": 487.982177734375, + "epoch": 0.03135593220338983, + "grad_norm": 0.017492673426871806, + "learning_rate": 6.271186440677966e-06, + "loss": 0.0287, + "num_tokens": 325036.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 37 + }, + { + "clip_ratio": 0.0003654122701846063, + "epoch": 0.03220338983050847, + "grad_norm": 0.016942624524485753, + "learning_rate": 6.440677966101695e-06, + "loss": 0.0287, + "step": 38 + }, + { + "clip_ratio": 0.0002445173158776015, + "epoch": 0.03305084745762712, + "grad_norm": 0.017357366453315624, + "learning_rate": 6.610169491525424e-06, + "loss": 0.0287, + "step": 39 + }, + { + "clip_ratio": 0.00027939456049352884, + "epoch": 0.03389830508474576, + "grad_norm": 0.017497160548341977, + "learning_rate": 6.779661016949153e-06, + "loss": 0.0287, + "step": 40 + }, + { + "clip_ratio": 0.00030169120873324573, + "completion_length": 337.76788330078125, + "epoch": 0.03474576271186441, + "grad_norm": 0.013386997712677729, + "learning_rate": 6.949152542372882e-06, + "loss": 0.0194, + "num_tokens": 351879.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 41 + }, + { + "clip_ratio": 0.000481679366203025, + "epoch": 0.03559322033898305, + "grad_norm": 0.013534365829241167, + "learning_rate": 7.1186440677966106e-06, + "loss": 0.0194, + "step": 42 + }, + { + "clip_ratio": 0.0006071141688153148, + "epoch": 0.036440677966101696, + "grad_norm": 0.013688658779614732, + "learning_rate": 7.288135593220339e-06, + "loss": 0.0193, + "step": 43 + }, + { + "clip_ratio": 0.0005443710251711309, + "epoch": 0.03728813559322034, + "grad_norm": 0.013415623466192152, + "learning_rate": 7.4576271186440685e-06, + "loss": 0.0194, + "step": 44 + }, + { + "clip_ratio": 0.00027171947294846177, + "completion_length": 358.6964416503906, + "epoch": 0.038135593220338986, + "grad_norm": 0.0, + "learning_rate": 7.627118644067797e-06, + "loss": 0.0, + "num_tokens": 379414.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 45 + }, + { + "clip_ratio": 0.00027013494400307536, + "epoch": 0.03898305084745763, + "grad_norm": 0.0, + "learning_rate": 7.796610169491526e-06, + "loss": 0.0, + "step": 46 + }, + { + "clip_ratio": 0.00023684222833253443, + "epoch": 0.03983050847457627, + "grad_norm": 0.0, + "learning_rate": 7.966101694915255e-06, + "loss": 0.0, + "step": 47 + }, + { + "clip_ratio": 0.0004315820406191051, + "epoch": 0.04067796610169491, + "grad_norm": 0.0, + "learning_rate": 8.135593220338983e-06, + "loss": 0.0, + "step": 48 + }, + { + "clip_ratio": 0.00034640118246898055, + "completion_length": 392.46429443359375, + "epoch": 0.04152542372881356, + "grad_norm": 0.05155975490631469, + "learning_rate": 8.305084745762712e-06, + "loss": -0.023, + "num_tokens": 408424.0, + "reward": -0.8571429252624512, + "reward_std": 0.24888646602630615, + "rewards/check_winston_local_func/mean": -0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 49 + }, + { + "clip_ratio": 0.00034579477505758405, + "epoch": 0.0423728813559322, + "grad_norm": 0.051568368553185584, + "learning_rate": 8.47457627118644e-06, + "loss": -0.0233, + "step": 50 + }, + { + "clip_ratio": 0.0005872369511052966, + "epoch": 0.043220338983050846, + "grad_norm": 0.054569986775825835, + "learning_rate": 8.64406779661017e-06, + "loss": -0.0235, + "step": 51 + }, + { + "clip_ratio": 0.00048618926666677, + "epoch": 0.04406779661016949, + "grad_norm": 0.05573624590215382, + "learning_rate": 8.8135593220339e-06, + "loss": -0.0236, + "step": 52 + }, + { + "clip_ratio": 0.000333156727720052, + "completion_length": 485.7500305175781, + "epoch": 0.044915254237288135, + "grad_norm": 0.0, + "learning_rate": 8.983050847457628e-06, + "loss": 0.0, + "num_tokens": 442986.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 53 + }, + { + "clip_ratio": 0.00042045177542604506, + "epoch": 0.04576271186440678, + "grad_norm": 0.0, + "learning_rate": 9.152542372881356e-06, + "loss": 0.0, + "step": 54 + }, + { + "clip_ratio": 0.00031678256345912814, + "epoch": 0.046610169491525424, + "grad_norm": 0.0, + "learning_rate": 9.322033898305085e-06, + "loss": 0.0, + "step": 55 + }, + { + "clip_ratio": 0.00010463170474395156, + "epoch": 0.04745762711864407, + "grad_norm": 0.0, + "learning_rate": 9.491525423728815e-06, + "loss": 0.0, + "step": 56 + }, + { + "clip_ratio": 0.0007074553286656737, + "completion_length": 428.3214416503906, + "epoch": 0.048305084745762714, + "grad_norm": 0.04153528214569023, + "learning_rate": 9.661016949152544e-06, + "loss": 0.0343, + "num_tokens": 473892.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 57 + }, + { + "clip_ratio": 0.0004013319849036634, + "epoch": 0.04915254237288136, + "grad_norm": 0.04657277213309362, + "learning_rate": 9.830508474576272e-06, + "loss": 0.0342, + "step": 58 + }, + { + "clip_ratio": 0.00044179416727274656, + "epoch": 0.05, + "grad_norm": 0.045153415468062494, + "learning_rate": 1e-05, + "loss": 0.0343, + "step": 59 + }, + { + "clip_ratio": 0.0007794442353770137, + "epoch": 0.05084745762711865, + "grad_norm": 0.035363902861678634, + "learning_rate": 1.016949152542373e-05, + "loss": 0.0339, + "step": 60 + }, + { + "clip_ratio": 0.00021712151647079736, + "completion_length": 299.8035888671875, + "epoch": 0.051694915254237285, + "grad_norm": 0.07205399219848665, + "learning_rate": 1.0338983050847458e-05, + "loss": 0.0477, + "num_tokens": 497465.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 61 + }, + { + "clip_ratio": 0.0002563712769187987, + "epoch": 0.05254237288135593, + "grad_norm": 0.07155354465871978, + "learning_rate": 1.0508474576271188e-05, + "loss": 0.0475, + "step": 62 + }, + { + "clip_ratio": 0.0001442718057660386, + "epoch": 0.053389830508474574, + "grad_norm": 0.07289445064494822, + "learning_rate": 1.0677966101694917e-05, + "loss": 0.0474, + "step": 63 + }, + { + "clip_ratio": 0.001116903149522841, + "epoch": 0.05423728813559322, + "grad_norm": 0.06596181254777028, + "learning_rate": 1.0847457627118645e-05, + "loss": 0.0468, + "step": 64 + }, + { + "clip_ratio": 0.00027901786961592734, + "completion_length": 480.4464416503906, + "epoch": 0.05508474576271186, + "grad_norm": 0.0, + "learning_rate": 1.1016949152542374e-05, + "loss": 0.0, + "num_tokens": 532266.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 65 + }, + { + "clip_ratio": 0.00037270825123414397, + "epoch": 0.05593220338983051, + "grad_norm": 0.0, + "learning_rate": 1.1186440677966102e-05, + "loss": 0.0, + "step": 66 + }, + { + "clip_ratio": 0.0006563978386111557, + "epoch": 0.05677966101694915, + "grad_norm": 0.0, + "learning_rate": 1.1355932203389833e-05, + "loss": 0.0, + "step": 67 + }, + { + "clip_ratio": 0.0008186621707864106, + "epoch": 0.0576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.1525423728813561e-05, + "loss": 0.0, + "step": 68 + }, + { + "clip_ratio": 0.0005370522267185152, + "completion_length": 420.3214416503906, + "epoch": 0.05847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.169491525423729e-05, + "loss": 0.0, + "num_tokens": 563380.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 69 + }, + { + "clip_ratio": 0.0007551547605544329, + "epoch": 0.059322033898305086, + "grad_norm": 0.0, + "learning_rate": 1.1864406779661018e-05, + "loss": 0.0, + "step": 70 + }, + { + "clip_ratio": 0.0004996137577109039, + "epoch": 0.06016949152542373, + "grad_norm": 0.0, + "learning_rate": 1.2033898305084745e-05, + "loss": 0.0, + "step": 71 + }, + { + "clip_ratio": 0.0007176484214141965, + "epoch": 0.061016949152542375, + "grad_norm": 0.0, + "learning_rate": 1.2203389830508477e-05, + "loss": 0.0, + "step": 72 + }, + { + "clip_ratio": 0.0004170738684479147, + "completion_length": 383.6964416503906, + "epoch": 0.06186440677966102, + "grad_norm": 0.01481240616851262, + "learning_rate": 1.2372881355932205e-05, + "loss": 0.0412, + "num_tokens": 592003.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 73 + }, + { + "clip_ratio": 0.0008365331450477242, + "epoch": 0.06271186440677966, + "grad_norm": 0.01522897212214854, + "learning_rate": 1.2542372881355932e-05, + "loss": 0.0411, + "step": 74 + }, + { + "clip_ratio": 0.000981268472969532, + "epoch": 0.0635593220338983, + "grad_norm": 0.014948882448171377, + "learning_rate": 1.2711864406779661e-05, + "loss": 0.0411, + "step": 75 + }, + { + "clip_ratio": 0.0006704007391817868, + "epoch": 0.06440677966101695, + "grad_norm": 0.015045917131498382, + "learning_rate": 1.288135593220339e-05, + "loss": 0.041, + "step": 76 + }, + { + "clip_ratio": 0.00022424904454965144, + "completion_length": 437.9821472167969, + "epoch": 0.06525423728813559, + "grad_norm": 0.030968041587588573, + "learning_rate": 1.305084745762712e-05, + "loss": 0.0453, + "num_tokens": 623050.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 77 + }, + { + "clip_ratio": 0.00053448136895895, + "epoch": 0.06610169491525424, + "grad_norm": 0.02976001587219013, + "learning_rate": 1.3220338983050848e-05, + "loss": 0.0453, + "step": 78 + }, + { + "clip_ratio": 0.0010130176087841392, + "epoch": 0.06694915254237288, + "grad_norm": 0.02743385432574901, + "learning_rate": 1.3389830508474577e-05, + "loss": 0.045, + "step": 79 + }, + { + "clip_ratio": 0.0011749044060707092, + "epoch": 0.06779661016949153, + "grad_norm": 0.025462048937107604, + "learning_rate": 1.3559322033898305e-05, + "loss": 0.045, + "step": 80 + }, + { + "clip_ratio": 0.001996266655623913, + "completion_length": 382.2321472167969, + "epoch": 0.06864406779661017, + "grad_norm": 0.13457631329414246, + "learning_rate": 1.3728813559322034e-05, + "loss": 0.0135, + "num_tokens": 651839.0, + "reward": -0.6785714626312256, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.6785714030265808, + "rewards/check_winston_local_func/std": 0.7411819696426392, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 81 + }, + { + "clip_ratio": 0.003203267464414239, + "epoch": 0.06949152542372881, + "grad_norm": 0.11807541511453928, + "learning_rate": 1.3898305084745764e-05, + "loss": 0.0128, + "step": 82 + }, + { + "clip_ratio": 0.011069249361753464, + "epoch": 0.07033898305084746, + "grad_norm": 0.0768781703261771, + "learning_rate": 1.4067796610169493e-05, + "loss": 0.0118, + "step": 83 + }, + { + "clip_ratio": 0.013229678384959698, + "epoch": 0.0711864406779661, + "grad_norm": 0.07925229229917279, + "learning_rate": 1.4237288135593221e-05, + "loss": 0.011, + "step": 84 + }, + { + "clip_ratio": 0.0002107896434608847, + "completion_length": 397.1964416503906, + "epoch": 0.07203389830508475, + "grad_norm": 0.0461083173277337, + "learning_rate": 1.440677966101695e-05, + "loss": 0.0389, + "num_tokens": 681218.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 85 + }, + { + "clip_ratio": 0.0010596371721476316, + "epoch": 0.07288135593220339, + "grad_norm": 0.04449467794694347, + "learning_rate": 1.4576271186440678e-05, + "loss": 0.0384, + "step": 86 + }, + { + "clip_ratio": 0.002870997181162238, + "epoch": 0.07372881355932204, + "grad_norm": 0.038978879976910054, + "learning_rate": 1.4745762711864408e-05, + "loss": 0.038, + "step": 87 + }, + { + "clip_ratio": 0.006624125875532627, + "epoch": 0.07457627118644068, + "grad_norm": 0.0364842012372814, + "learning_rate": 1.4915254237288137e-05, + "loss": 0.0377, + "step": 88 + }, + { + "clip_ratio": 0.00043057286529801786, + "completion_length": 399.64288330078125, + "epoch": 0.07542372881355933, + "grad_norm": 0.014090924578944663, + "learning_rate": 1.5084745762711865e-05, + "loss": 0.0328, + "num_tokens": 711078.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 89 + }, + { + "clip_ratio": 0.0018296982161700726, + "epoch": 0.07627118644067797, + "grad_norm": 0.014531205963070252, + "learning_rate": 1.5254237288135594e-05, + "loss": 0.0328, + "step": 90 + }, + { + "clip_ratio": 0.004530549980700016, + "epoch": 0.07711864406779662, + "grad_norm": 0.014754831265979268, + "learning_rate": 1.5423728813559326e-05, + "loss": 0.0327, + "step": 91 + }, + { + "clip_ratio": 0.008132151328027248, + "epoch": 0.07796610169491526, + "grad_norm": 0.014608619166449479, + "learning_rate": 1.5593220338983053e-05, + "loss": 0.0326, + "step": 92 + }, + { + "clip_ratio": 0.0007373582920990884, + "completion_length": 467.71429443359375, + "epoch": 0.0788135593220339, + "grad_norm": 0.041297580984419976, + "learning_rate": 1.576271186440678e-05, + "loss": 0.0616, + "num_tokens": 745862.0, + "reward": -0.8928571939468384, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 93 + }, + { + "clip_ratio": 0.001287531922571361, + "epoch": 0.07966101694915254, + "grad_norm": 0.030858648065290283, + "learning_rate": 1.593220338983051e-05, + "loss": 0.0614, + "step": 94 + }, + { + "clip_ratio": 0.0023924303241074085, + "epoch": 0.08050847457627118, + "grad_norm": 0.03463914321182917, + "learning_rate": 1.6101694915254237e-05, + "loss": 0.0613, + "step": 95 + }, + { + "clip_ratio": 0.00350037869066, + "epoch": 0.08135593220338982, + "grad_norm": 0.02665011286164521, + "learning_rate": 1.6271186440677967e-05, + "loss": 0.0611, + "step": 96 + }, + { + "clip_ratio": 0.0006918495637364686, + "completion_length": 320.75, + "epoch": 0.08220338983050847, + "grad_norm": 0.06373891470490567, + "learning_rate": 1.6440677966101697e-05, + "loss": -0.015, + "num_tokens": 771576.0, + "reward": -0.7500000596046448, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 97 + }, + { + "clip_ratio": 0.0029753418639302254, + "epoch": 0.08305084745762711, + "grad_norm": 0.05523249333421511, + "learning_rate": 1.6610169491525424e-05, + "loss": -0.0157, + "step": 98 + }, + { + "clip_ratio": 0.00716389948502183, + "epoch": 0.08389830508474576, + "grad_norm": 0.04924083222576615, + "learning_rate": 1.6779661016949154e-05, + "loss": -0.0158, + "step": 99 + }, + { + "clip_ratio": 0.011036296375095844, + "epoch": 0.0847457627118644, + "grad_norm": 0.04955323333773024, + "learning_rate": 1.694915254237288e-05, + "loss": -0.0163, + "step": 100 + }, + { + "clip_ratio": 0.00038607188616879284, + "completion_length": 507.2500305175781, + "epoch": 0.08559322033898305, + "grad_norm": 0.0, + "learning_rate": 1.711864406779661e-05, + "loss": 0.0, + "num_tokens": 807230.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 101 + }, + { + "clip_ratio": 0.0004233713843859732, + "epoch": 0.08644067796610169, + "grad_norm": 0.0, + "learning_rate": 1.728813559322034e-05, + "loss": 0.0, + "step": 102 + }, + { + "clip_ratio": 0.0005304253427311778, + "epoch": 0.08728813559322034, + "grad_norm": 0.0, + "learning_rate": 1.745762711864407e-05, + "loss": 0.0, + "step": 103 + }, + { + "clip_ratio": 0.0008094432414509356, + "epoch": 0.08813559322033898, + "grad_norm": 0.0, + "learning_rate": 1.76271186440678e-05, + "loss": 0.0, + "step": 104 + }, + { + "clip_ratio": 0.0003136220038868487, + "completion_length": 309.4821472167969, + "epoch": 0.08898305084745763, + "grad_norm": 0.1215376293190595, + "learning_rate": 1.7796610169491526e-05, + "loss": 0.059, + "num_tokens": 830873.0, + "reward": -0.6071428656578064, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.6071428656578064, + "rewards/check_winston_local_func/std": 0.8017837405204773, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 105 + }, + { + "clip_ratio": 0.005680752452462912, + "epoch": 0.08983050847457627, + "grad_norm": 0.08882976004122672, + "learning_rate": 1.7966101694915256e-05, + "loss": 0.057, + "step": 106 + }, + { + "clip_ratio": 0.013865095563232899, + "epoch": 0.09067796610169492, + "grad_norm": 0.07178187465318808, + "learning_rate": 1.8135593220338986e-05, + "loss": 0.0551, + "step": 107 + }, + { + "clip_ratio": 0.025337526574730873, + "epoch": 0.09152542372881356, + "grad_norm": 0.05889114052835241, + "learning_rate": 1.8305084745762713e-05, + "loss": 0.054, + "step": 108 + }, + { + "clip_ratio": 0.0004973930190317333, + "completion_length": 309.2857360839844, + "epoch": 0.0923728813559322, + "grad_norm": 0.10159993090017184, + "learning_rate": 1.8474576271186443e-05, + "loss": 0.1029, + "num_tokens": 856689.0, + "reward": -0.7500000596046448, + "reward_std": 0.4123912453651428, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 109 + }, + { + "clip_ratio": 0.005504293367266655, + "epoch": 0.09322033898305085, + "grad_norm": 0.09390182129772277, + "learning_rate": 1.864406779661017e-05, + "loss": 0.1017, + "step": 110 + }, + { + "clip_ratio": 0.022907190024852753, + "epoch": 0.0940677966101695, + "grad_norm": 0.08701453983072766, + "learning_rate": 1.88135593220339e-05, + "loss": 0.0999, + "step": 111 + }, + { + "clip_ratio": 0.04514092579483986, + "epoch": 0.09491525423728814, + "grad_norm": 0.08477253768734147, + "learning_rate": 1.898305084745763e-05, + "loss": 0.0987, + "step": 112 + }, + { + "clip_ratio": 0.0005664547788910568, + "completion_length": 434.39288330078125, + "epoch": 0.09576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.9152542372881357e-05, + "loss": 0.0, + "num_tokens": 888255.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 113 + }, + { + "clip_ratio": 0.0015907255001366138, + "epoch": 0.09661016949152543, + "grad_norm": 0.0, + "learning_rate": 1.9322033898305087e-05, + "loss": 0.0, + "step": 114 + }, + { + "clip_ratio": 0.003365863347426057, + "epoch": 0.09745762711864407, + "grad_norm": 0.0, + "learning_rate": 1.9491525423728814e-05, + "loss": 0.0, + "step": 115 + }, + { + "clip_ratio": 0.006915883626788855, + "epoch": 0.09830508474576272, + "grad_norm": 0.0, + "learning_rate": 1.9661016949152545e-05, + "loss": 0.0, + "step": 116 + }, + { + "clip_ratio": 0.0015928384382277727, + "completion_length": 311.08929443359375, + "epoch": 0.09915254237288136, + "grad_norm": 0.1669528890016949, + "learning_rate": 1.9830508474576275e-05, + "loss": 0.0592, + "num_tokens": 912948.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 117 + }, + { + "clip_ratio": 0.006070761010050774, + "epoch": 0.1, + "grad_norm": 0.15701074253607375, + "learning_rate": 2e-05, + "loss": 0.056, + "step": 118 + }, + { + "clip_ratio": 0.03282368928194046, + "epoch": 0.10084745762711865, + "grad_norm": 0.21942626154682726, + "learning_rate": 2.016949152542373e-05, + "loss": 0.0526, + "step": 119 + }, + { + "clip_ratio": 0.0628986731171608, + "epoch": 0.1016949152542373, + "grad_norm": 0.1568339023062343, + "learning_rate": 2.033898305084746e-05, + "loss": 0.0497, + "step": 120 + }, + { + "clip_ratio": 0.0003240547957830131, + "completion_length": 490.607177734375, + "epoch": 0.10254237288135593, + "grad_norm": 0.0, + "learning_rate": 2.0508474576271186e-05, + "loss": 0.0, + "num_tokens": 947318.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 121 + }, + { + "clip_ratio": 0.00037375700776465237, + "epoch": 0.10338983050847457, + "grad_norm": 0.0, + "learning_rate": 2.0677966101694916e-05, + "loss": 0.0, + "step": 122 + }, + { + "clip_ratio": 0.0011371899163350463, + "epoch": 0.10423728813559321, + "grad_norm": 0.0, + "learning_rate": 2.084745762711865e-05, + "loss": 0.0, + "step": 123 + }, + { + "clip_ratio": 0.0022452734410762787, + "epoch": 0.10508474576271186, + "grad_norm": 0.0, + "learning_rate": 2.1016949152542376e-05, + "loss": 0.0, + "step": 124 + }, + { + "clip_ratio": 0.004924725275486708, + "completion_length": 324.58929443359375, + "epoch": 0.1059322033898305, + "grad_norm": 0.3997089536055672, + "learning_rate": 2.1186440677966103e-05, + "loss": 0.04, + "num_tokens": 972527.0, + "reward": -0.8214285969734192, + "reward_std": 0.36553531885147095, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 125 + }, + { + "clip_ratio": 0.036066196858882904, + "epoch": 0.10677966101694915, + "grad_norm": 0.4003737832223874, + "learning_rate": 2.1355932203389833e-05, + "loss": 0.0371, + "step": 126 + }, + { + "clip_ratio": 0.06804865598678589, + "epoch": 0.10762711864406779, + "grad_norm": 0.3262616499772286, + "learning_rate": 2.152542372881356e-05, + "loss": 0.0328, + "step": 127 + }, + { + "clip_ratio": 0.08261267095804214, + "epoch": 0.10847457627118644, + "grad_norm": 0.19475445080797668, + "learning_rate": 2.169491525423729e-05, + "loss": 0.0284, + "step": 128 + }, + { + "clip_ratio": 0.00042747953557409346, + "completion_length": 441.6785888671875, + "epoch": 0.10932203389830508, + "grad_norm": 0.07121815374577634, + "learning_rate": 2.1864406779661017e-05, + "loss": 0.0215, + "num_tokens": 1005157.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 129 + }, + { + "clip_ratio": 0.0005996564286760986, + "epoch": 0.11016949152542373, + "grad_norm": 0.07374447574020743, + "learning_rate": 2.2033898305084748e-05, + "loss": 0.021, + "step": 130 + }, + { + "clip_ratio": 0.0070611475966870785, + "epoch": 0.11101694915254237, + "grad_norm": 0.0484843694410488, + "learning_rate": 2.2203389830508474e-05, + "loss": 0.02, + "step": 131 + }, + { + "clip_ratio": 0.02419929951429367, + "epoch": 0.11186440677966102, + "grad_norm": 0.03734227928764934, + "learning_rate": 2.2372881355932205e-05, + "loss": 0.0194, + "step": 132 + }, + { + "clip_ratio": 0.0008097242680378258, + "completion_length": 299.9285888671875, + "epoch": 0.11271186440677966, + "grad_norm": 0.2037296860020652, + "learning_rate": 2.2542372881355935e-05, + "loss": 0.0123, + "num_tokens": 1029577.0, + "reward": -0.4285714626312256, + "reward_std": 0.49777287244796753, + "rewards/check_winston_local_func/mean": -0.4285714328289032, + "rewards/check_winston_local_func/std": 0.9116845726966858, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 133 + }, + { + "clip_ratio": 0.010970565490424633, + "epoch": 0.1135593220338983, + "grad_norm": 0.15172018259613887, + "learning_rate": 2.2711864406779665e-05, + "loss": 0.0095, + "step": 134 + }, + { + "clip_ratio": 0.027290966361761093, + "epoch": 0.11440677966101695, + "grad_norm": 0.14632003828933562, + "learning_rate": 2.2881355932203392e-05, + "loss": 0.0066, + "step": 135 + }, + { + "clip_ratio": 0.04884405434131622, + "epoch": 0.1152542372881356, + "grad_norm": 0.13010992493757564, + "learning_rate": 2.3050847457627122e-05, + "loss": 0.0037, + "step": 136 + }, + { + "clip_ratio": 0.00016204381245188415, + "completion_length": 397.9821472167969, + "epoch": 0.11610169491525424, + "grad_norm": 0.0819715923540025, + "learning_rate": 2.322033898305085e-05, + "loss": 0.0348, + "num_tokens": 1059368.0, + "reward": -0.7500000596046448, + "reward_std": 0.3499017357826233, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 137 + }, + { + "clip_ratio": 0.0012223825324326754, + "epoch": 0.11694915254237288, + "grad_norm": 0.07970324522981491, + "learning_rate": 2.338983050847458e-05, + "loss": 0.0336, + "step": 138 + }, + { + "clip_ratio": 0.015393489971756935, + "epoch": 0.11779661016949153, + "grad_norm": 0.07570693688371119, + "learning_rate": 2.3559322033898306e-05, + "loss": 0.0321, + "step": 139 + }, + { + "clip_ratio": 0.07253921031951904, + "epoch": 0.11864406779661017, + "grad_norm": 0.05800544884381334, + "learning_rate": 2.3728813559322036e-05, + "loss": 0.0305, + "step": 140 + }, + { + "clip_ratio": 0.00020609110652003437, + "completion_length": 376.3035888671875, + "epoch": 0.11949152542372882, + "grad_norm": 0.16488571125022886, + "learning_rate": 2.3898305084745763e-05, + "loss": -0.0156, + "num_tokens": 1088561.0, + "reward": -0.5, + "reward_std": 0.686587929725647, + "rewards/check_winston_local_func/mean": -0.5, + "rewards/check_winston_local_func/std": 0.8738628625869751, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 141 + }, + { + "clip_ratio": 0.020974619314074516, + "epoch": 0.12033898305084746, + "grad_norm": 0.12033253885509411, + "learning_rate": 2.406779661016949e-05, + "loss": -0.02, + "step": 142 + }, + { + "clip_ratio": 0.14757588505744934, + "epoch": 0.1211864406779661, + "grad_norm": 0.18906094003962706, + "learning_rate": 2.4237288135593224e-05, + "loss": -0.0215, + "step": 143 + }, + { + "clip_ratio": 0.18001240491867065, + "epoch": 0.12203389830508475, + "grad_norm": 0.2094330456679022, + "learning_rate": 2.4406779661016954e-05, + "loss": -0.0238, + "step": 144 + }, + { + "clip_ratio": 0.0010827317601069808, + "completion_length": 216.85714721679688, + "epoch": 0.1228813559322034, + "grad_norm": 0.22593574409537565, + "learning_rate": 2.457627118644068e-05, + "loss": -0.057, + "num_tokens": 1107713.0, + "reward": -0.0357142873108387, + "reward_std": 0.808063805103302, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 145 + }, + { + "clip_ratio": 0.01685175858438015, + "epoch": 0.12372881355932204, + "grad_norm": 0.21920453847219976, + "learning_rate": 2.474576271186441e-05, + "loss": -0.0622, + "step": 146 + }, + { + "clip_ratio": 0.05698274075984955, + "epoch": 0.12457627118644068, + "grad_norm": 0.23790061749019706, + "learning_rate": 2.4915254237288138e-05, + "loss": -0.0672, + "step": 147 + }, + { + "clip_ratio": 0.06983836740255356, + "epoch": 0.12542372881355932, + "grad_norm": 0.19359662720887325, + "learning_rate": 2.5084745762711865e-05, + "loss": -0.0724, + "step": 148 + }, + { + "clip_ratio": 0.0013232758501544595, + "completion_length": 251.96429443359375, + "epoch": 0.12627118644067797, + "grad_norm": 0.27961740628458276, + "learning_rate": 2.5254237288135595e-05, + "loss": 0.06, + "num_tokens": 1129487.0, + "reward": -0.0357142873108387, + "reward_std": 0.9462584257125854, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 149 + }, + { + "clip_ratio": 0.03364234417676926, + "epoch": 0.1271186440677966, + "grad_norm": 0.19276991014072303, + "learning_rate": 2.5423728813559322e-05, + "loss": 0.054, + "step": 150 + }, + { + "clip_ratio": 0.1430949568748474, + "epoch": 0.12796610169491526, + "grad_norm": 0.2768368269508983, + "learning_rate": 2.5593220338983052e-05, + "loss": 0.0518, + "step": 151 + }, + { + "clip_ratio": 0.16415317356586456, + "epoch": 0.1288135593220339, + "grad_norm": 0.25743304440606246, + "learning_rate": 2.576271186440678e-05, + "loss": 0.0475, + "step": 152 + }, + { + "clip_ratio": 0.0013469145633280277, + "completion_length": 204.48214721679688, + "epoch": 0.12966101694915255, + "grad_norm": 0.28188012404317475, + "learning_rate": 2.5932203389830512e-05, + "loss": 0.0527, + "num_tokens": 1148354.0, + "reward": 0.1428571492433548, + "reward_std": 0.7129831910133362, + "rewards/check_winston_local_func/mean": 0.1428571492433548, + "rewards/check_winston_local_func/std": 0.9987004995346069, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 153 + }, + { + "clip_ratio": 0.016695290803909302, + "epoch": 0.13050847457627118, + "grad_norm": 0.2641379759457116, + "learning_rate": 2.610169491525424e-05, + "loss": 0.0473, + "step": 154 + }, + { + "clip_ratio": 0.05237039551138878, + "epoch": 0.13135593220338984, + "grad_norm": 0.20691108630731772, + "learning_rate": 2.627118644067797e-05, + "loss": 0.0414, + "step": 155 + }, + { + "clip_ratio": 0.0867982804775238, + "epoch": 0.13220338983050847, + "grad_norm": 0.15341544674011254, + "learning_rate": 2.6440677966101696e-05, + "loss": 0.0351, + "step": 156 + }, + { + "clip_ratio": 0.0006545564392581582, + "completion_length": 233.9285888671875, + "epoch": 0.13305084745762713, + "grad_norm": 0.16036976523795443, + "learning_rate": 2.6610169491525427e-05, + "loss": 0.0179, + "num_tokens": 1168622.0, + "reward": 0.7142857313156128, + "reward_std": 0.4016071856021881, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 157 + }, + { + "clip_ratio": 0.00993060227483511, + "epoch": 0.13389830508474576, + "grad_norm": 0.1298083776077636, + "learning_rate": 2.6779661016949153e-05, + "loss": 0.0151, + "step": 158 + }, + { + "clip_ratio": 0.0733163133263588, + "epoch": 0.13474576271186442, + "grad_norm": 0.11590218855503849, + "learning_rate": 2.6949152542372884e-05, + "loss": 0.0125, + "step": 159 + }, + { + "clip_ratio": 0.14935636520385742, + "epoch": 0.13559322033898305, + "grad_norm": 0.16154268567658825, + "learning_rate": 2.711864406779661e-05, + "loss": 0.011, + "step": 160 + }, + { + "clip_ratio": 0.0009650280699133873, + "completion_length": 174.7678680419922, + "epoch": 0.13644067796610168, + "grad_norm": 0.15404950919743313, + "learning_rate": 2.728813559322034e-05, + "loss": 0.0078, + "num_tokens": 1185697.0, + "reward": 0.8571429252624512, + "reward_std": 0.3342905640602112, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 161 + }, + { + "clip_ratio": 0.004791476763784885, + "epoch": 0.13728813559322034, + "grad_norm": 0.12682344230599282, + "learning_rate": 2.7457627118644068e-05, + "loss": 0.0056, + "step": 162 + }, + { + "clip_ratio": 0.023417560383677483, + "epoch": 0.13813559322033897, + "grad_norm": 0.0948693079603576, + "learning_rate": 2.76271186440678e-05, + "loss": 0.003, + "step": 163 + }, + { + "clip_ratio": 0.07911951839923859, + "epoch": 0.13898305084745763, + "grad_norm": 0.09089932231497586, + "learning_rate": 2.7796610169491528e-05, + "loss": 0.0007, + "step": 164 + }, + { + "clip_ratio": 0.000979878008365631, + "completion_length": 126.64286041259766, + "epoch": 0.13983050847457626, + "grad_norm": 0.1801163708005843, + "learning_rate": 2.7966101694915258e-05, + "loss": -0.0396, + "num_tokens": 1199565.0, + "reward": 0.7500000596046448, + "reward_std": 0.3859959840774536, + "rewards/check_winston_local_func/mean": 0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 165 + }, + { + "clip_ratio": 0.009913308545947075, + "epoch": 0.14067796610169492, + "grad_norm": 0.14588220837158195, + "learning_rate": 2.8135593220338985e-05, + "loss": -0.0428, + "step": 166 + }, + { + "clip_ratio": 0.07110879570245743, + "epoch": 0.14152542372881355, + "grad_norm": 0.276973278154756, + "learning_rate": 2.8305084745762715e-05, + "loss": -0.0441, + "step": 167 + }, + { + "clip_ratio": 0.06909574568271637, + "epoch": 0.1423728813559322, + "grad_norm": 0.12488402451050255, + "learning_rate": 2.8474576271186442e-05, + "loss": -0.0494, + "step": 168 + }, + { + "clip_ratio": 0.0003819709818344563, + "completion_length": 152.73214721679688, + "epoch": 0.14322033898305084, + "grad_norm": 0.3195642927880649, + "learning_rate": 2.8644067796610172e-05, + "loss": 0.0302, + "num_tokens": 1214790.0, + "reward": 0.7142857313156128, + "reward_std": 0.3342905342578888, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 169 + }, + { + "clip_ratio": 0.015475978143513203, + "epoch": 0.1440677966101695, + "grad_norm": 0.2360393211362849, + "learning_rate": 2.88135593220339e-05, + "loss": 0.0228, + "step": 170 + }, + { + "clip_ratio": 0.08493895828723907, + "epoch": 0.14491525423728813, + "grad_norm": 0.17350104363138513, + "learning_rate": 2.8983050847457626e-05, + "loss": 0.0163, + "step": 171 + }, + { + "clip_ratio": 0.14768318831920624, + "epoch": 0.14576271186440679, + "grad_norm": 0.19569281232532856, + "learning_rate": 2.9152542372881356e-05, + "loss": 0.013, + "step": 172 + }, + { + "clip_ratio": 0.006150017958134413, + "completion_length": 186.25001525878906, + "epoch": 0.14661016949152542, + "grad_norm": 0.06068449124289285, + "learning_rate": 2.932203389830509e-05, + "loss": -0.0169, + "num_tokens": 1232564.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 173 + }, + { + "clip_ratio": 0.014521388337016106, + "epoch": 0.14745762711864407, + "grad_norm": 0.05994459441740582, + "learning_rate": 2.9491525423728817e-05, + "loss": -0.0174, + "step": 174 + }, + { + "clip_ratio": 0.04354570060968399, + "epoch": 0.1483050847457627, + "grad_norm": 0.06278027199945566, + "learning_rate": 2.9661016949152547e-05, + "loss": -0.0183, + "step": 175 + }, + { + "clip_ratio": 0.10504651814699173, + "epoch": 0.14915254237288136, + "grad_norm": 0.04416483226500781, + "learning_rate": 2.9830508474576274e-05, + "loss": -0.0193, + "step": 176 + }, + { + "clip_ratio": 0.003162125591188669, + "completion_length": 163.17857360839844, + "epoch": 0.15, + "grad_norm": 0.11359153510598317, + "learning_rate": 3.0000000000000004e-05, + "loss": -0.0335, + "num_tokens": 1248550.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 177 + }, + { + "clip_ratio": 0.01111722644418478, + "epoch": 0.15084745762711865, + "grad_norm": 0.10416258904679447, + "learning_rate": 3.016949152542373e-05, + "loss": -0.0347, + "step": 178 + }, + { + "clip_ratio": 0.04117439687252045, + "epoch": 0.15169491525423728, + "grad_norm": 0.08204255975558637, + "learning_rate": 3.0338983050847458e-05, + "loss": -0.0364, + "step": 179 + }, + { + "clip_ratio": 0.08657827973365784, + "epoch": 0.15254237288135594, + "grad_norm": 0.08178448057500348, + "learning_rate": 3.0508474576271188e-05, + "loss": -0.038, + "step": 180 + }, + { + "clip_ratio": 0.010199248790740967, + "completion_length": 162.35714721679688, + "epoch": 0.15338983050847457, + "grad_norm": 0.4819050859019718, + "learning_rate": 3.067796610169492e-05, + "loss": 0.06, + "num_tokens": 1264994.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 181 + }, + { + "clip_ratio": 0.060436759144067764, + "epoch": 0.15423728813559323, + "grad_norm": 0.1959898799735129, + "learning_rate": 3.084745762711865e-05, + "loss": 0.0533, + "step": 182 + }, + { + "clip_ratio": 0.13463598489761353, + "epoch": 0.15508474576271186, + "grad_norm": 0.12678282333898375, + "learning_rate": 3.101694915254238e-05, + "loss": 0.0482, + "step": 183 + }, + { + "clip_ratio": 0.19176946580410004, + "epoch": 0.15593220338983052, + "grad_norm": 0.10756609820315277, + "learning_rate": 3.1186440677966106e-05, + "loss": 0.0463, + "step": 184 + }, + { + "clip_ratio": 0.0008241009199991822, + "completion_length": 237.60714721679688, + "epoch": 0.15677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.135593220338983e-05, + "loss": 0.0, + "num_tokens": 1286164.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 185 + }, + { + "clip_ratio": 0.002994579030200839, + "epoch": 0.1576271186440678, + "grad_norm": 0.0, + "learning_rate": 3.152542372881356e-05, + "loss": 0.0, + "step": 186 + }, + { + "clip_ratio": 0.00574068445712328, + "epoch": 0.15847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.169491525423729e-05, + "loss": 0.0, + "step": 187 + }, + { + "clip_ratio": 0.012791804037988186, + "epoch": 0.15932203389830507, + "grad_norm": 0.0, + "learning_rate": 3.186440677966102e-05, + "loss": 0.0, + "step": 188 + }, + { + "clip_ratio": 0.006764067802578211, + "completion_length": 143.94644165039062, + "epoch": 0.16016949152542373, + "grad_norm": 0.04704135237627796, + "learning_rate": 3.203389830508475e-05, + "loss": -0.0095, + "num_tokens": 1301409.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 189 + }, + { + "clip_ratio": 0.013139193877577782, + "epoch": 0.16101694915254236, + "grad_norm": 0.04532372769932697, + "learning_rate": 3.2203389830508473e-05, + "loss": -0.0098, + "step": 190 + }, + { + "clip_ratio": 0.03423069044947624, + "epoch": 0.16186440677966102, + "grad_norm": 0.040646403971755785, + "learning_rate": 3.237288135593221e-05, + "loss": -0.0105, + "step": 191 + }, + { + "clip_ratio": 0.06455554068088531, + "epoch": 0.16271186440677965, + "grad_norm": 0.03643001220928061, + "learning_rate": 3.2542372881355934e-05, + "loss": -0.0113, + "step": 192 + }, + { + "clip_ratio": 0.0007823093910701573, + "completion_length": 229.9107208251953, + "epoch": 0.1635593220338983, + "grad_norm": 0.0, + "learning_rate": 3.271186440677967e-05, + "loss": 0.0, + "num_tokens": 1321708.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 193 + }, + { + "clip_ratio": 0.0008988279732875526, + "epoch": 0.16440677966101694, + "grad_norm": 0.0, + "learning_rate": 3.2881355932203394e-05, + "loss": 0.0, + "step": 194 + }, + { + "clip_ratio": 0.003465626621618867, + "epoch": 0.1652542372881356, + "grad_norm": 0.0, + "learning_rate": 3.305084745762712e-05, + "loss": 0.0, + "step": 195 + }, + { + "clip_ratio": 0.008655412122607231, + "epoch": 0.16610169491525423, + "grad_norm": 0.0, + "learning_rate": 3.322033898305085e-05, + "loss": 0.0, + "step": 196 + }, + { + "clip_ratio": 0.0021059864666312933, + "completion_length": 166.85714721679688, + "epoch": 0.1669491525423729, + "grad_norm": 0.17307734331449404, + "learning_rate": 3.338983050847458e-05, + "loss": -0.0036, + "num_tokens": 1338540.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 197 + }, + { + "clip_ratio": 0.009274979121983051, + "epoch": 0.16779661016949152, + "grad_norm": 0.10860266060182006, + "learning_rate": 3.355932203389831e-05, + "loss": -0.0066, + "step": 198 + }, + { + "clip_ratio": 0.03715561330318451, + "epoch": 0.16864406779661018, + "grad_norm": 0.09136703784102146, + "learning_rate": 3.3728813559322035e-05, + "loss": -0.008, + "step": 199 + }, + { + "clip_ratio": 0.06759678572416306, + "epoch": 0.1694915254237288, + "grad_norm": 0.08121070179066665, + "learning_rate": 3.389830508474576e-05, + "loss": -0.009, + "step": 200 + }, + { + "clip_ratio": 0.0002369106950936839, + "completion_length": 132.625, + "epoch": 0.17033898305084746, + "grad_norm": 0.12222790896016958, + "learning_rate": 3.406779661016949e-05, + "loss": 0.0931, + "num_tokens": 1352735.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 201 + }, + { + "clip_ratio": 0.0015077884308993816, + "epoch": 0.1711864406779661, + "grad_norm": 0.12028738542362348, + "learning_rate": 3.423728813559322e-05, + "loss": 0.0915, + "step": 202 + }, + { + "clip_ratio": 0.011990153230726719, + "epoch": 0.17203389830508475, + "grad_norm": 0.10639183565121645, + "learning_rate": 3.4406779661016956e-05, + "loss": 0.0873, + "step": 203 + }, + { + "clip_ratio": 0.05813857913017273, + "epoch": 0.17288135593220338, + "grad_norm": 0.08983262526351615, + "learning_rate": 3.457627118644068e-05, + "loss": 0.0833, + "step": 204 + }, + { + "clip_ratio": 0.001714512356556952, + "completion_length": 90.37500762939453, + "epoch": 0.17372881355932204, + "grad_norm": 0.10843637606790192, + "learning_rate": 3.474576271186441e-05, + "loss": 0.0164, + "num_tokens": 1365892.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 205 + }, + { + "clip_ratio": 0.017641481012105942, + "epoch": 0.17457627118644067, + "grad_norm": 0.08303991648667351, + "learning_rate": 3.491525423728814e-05, + "loss": 0.0148, + "step": 206 + }, + { + "clip_ratio": 0.15765391290187836, + "epoch": 0.17542372881355933, + "grad_norm": 0.07279655924549996, + "learning_rate": 3.5084745762711864e-05, + "loss": 0.0138, + "step": 207 + }, + { + "clip_ratio": 0.2804856598377228, + "epoch": 0.17627118644067796, + "grad_norm": 0.09315271598947107, + "learning_rate": 3.52542372881356e-05, + "loss": 0.0135, + "step": 208 + }, + { + "clip_ratio": 0.003159541869536042, + "completion_length": 67.30357360839844, + "epoch": 0.17711864406779662, + "grad_norm": 0.0, + "learning_rate": 3.5423728813559324e-05, + "loss": 0.0, + "num_tokens": 1376973.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 209 + }, + { + "clip_ratio": 0.004362096078693867, + "epoch": 0.17796610169491525, + "grad_norm": 0.0, + "learning_rate": 3.559322033898305e-05, + "loss": 0.0, + "step": 210 + }, + { + "clip_ratio": 0.01770210638642311, + "epoch": 0.1788135593220339, + "grad_norm": 0.0, + "learning_rate": 3.576271186440678e-05, + "loss": 0.0, + "step": 211 + }, + { + "clip_ratio": 0.035751208662986755, + "epoch": 0.17966101694915254, + "grad_norm": 0.0, + "learning_rate": 3.593220338983051e-05, + "loss": 0.0, + "step": 212 + }, + { + "clip_ratio": 0.0035622839350253344, + "completion_length": 65.08928680419922, + "epoch": 0.1805084745762712, + "grad_norm": 0.0, + "learning_rate": 3.610169491525424e-05, + "loss": 0.0, + "num_tokens": 1387570.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 213 + }, + { + "clip_ratio": 0.0025523039512336254, + "epoch": 0.18135593220338983, + "grad_norm": 0.0, + "learning_rate": 3.627118644067797e-05, + "loss": 0.0, + "step": 214 + }, + { + "clip_ratio": 0.005835308227688074, + "epoch": 0.18220338983050846, + "grad_norm": 0.0, + "learning_rate": 3.64406779661017e-05, + "loss": 0.0, + "step": 215 + }, + { + "clip_ratio": 0.011904297396540642, + "epoch": 0.18305084745762712, + "grad_norm": 0.0, + "learning_rate": 3.6610169491525426e-05, + "loss": 0.0, + "step": 216 + }, + { + "clip_ratio": 0.0003092146071139723, + "completion_length": 51.10714340209961, + "epoch": 0.18389830508474575, + "grad_norm": 0.356952256149441, + "learning_rate": 3.677966101694915e-05, + "loss": -0.0154, + "num_tokens": 1397320.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 217 + }, + { + "clip_ratio": 0.11051050573587418, + "epoch": 0.1847457627118644, + "grad_norm": 0.1542677635762948, + "learning_rate": 3.6949152542372886e-05, + "loss": -0.019, + "step": 218 + }, + { + "clip_ratio": 0.18382969498634338, + "epoch": 0.18559322033898304, + "grad_norm": 0.13966519767464722, + "learning_rate": 3.711864406779661e-05, + "loss": -0.0211, + "step": 219 + }, + { + "clip_ratio": 0.27009809017181396, + "epoch": 0.1864406779661017, + "grad_norm": 0.08132731257822706, + "learning_rate": 3.728813559322034e-05, + "loss": -0.0238, + "step": 220 + }, + { + "clip_ratio": 0.006028716918081045, + "completion_length": 83.26786041259766, + "epoch": 0.18728813559322033, + "grad_norm": 0.0, + "learning_rate": 3.745762711864407e-05, + "loss": 0.0, + "num_tokens": 1409935.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 221 + }, + { + "clip_ratio": 0.021660711616277695, + "epoch": 0.188135593220339, + "grad_norm": 0.0, + "learning_rate": 3.76271186440678e-05, + "loss": 0.0, + "step": 222 + }, + { + "clip_ratio": 0.06699295341968536, + "epoch": 0.18898305084745762, + "grad_norm": 0.0, + "learning_rate": 3.779661016949153e-05, + "loss": 0.0, + "step": 223 + }, + { + "clip_ratio": 0.1347362995147705, + "epoch": 0.18983050847457628, + "grad_norm": 0.0, + "learning_rate": 3.796610169491526e-05, + "loss": 0.0, + "step": 224 + }, + { + "clip_ratio": 0.0006836191168986261, + "completion_length": 97.92857360839844, + "epoch": 0.1906779661016949, + "grad_norm": 0.05873233342660551, + "learning_rate": 3.813559322033899e-05, + "loss": 0.0388, + "num_tokens": 1422307.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 225 + }, + { + "clip_ratio": 0.0010315729305148125, + "epoch": 0.19152542372881357, + "grad_norm": 0.05937392738616397, + "learning_rate": 3.8305084745762714e-05, + "loss": 0.0386, + "step": 226 + }, + { + "clip_ratio": 0.01008252426981926, + "epoch": 0.1923728813559322, + "grad_norm": 0.0544038037479039, + "learning_rate": 3.847457627118644e-05, + "loss": 0.0371, + "step": 227 + }, + { + "clip_ratio": 0.0420512929558754, + "epoch": 0.19322033898305085, + "grad_norm": 0.047388133840752925, + "learning_rate": 3.8644067796610175e-05, + "loss": 0.0356, + "step": 228 + }, + { + "clip_ratio": 0.003829076187685132, + "completion_length": 48.48214340209961, + "epoch": 0.19406779661016949, + "grad_norm": 0.24338559301731436, + "learning_rate": 3.88135593220339e-05, + "loss": -0.0201, + "num_tokens": 1432182.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 229 + }, + { + "clip_ratio": 0.041657134890556335, + "epoch": 0.19491525423728814, + "grad_norm": 0.15635724094524717, + "learning_rate": 3.898305084745763e-05, + "loss": -0.026, + "step": 230 + }, + { + "clip_ratio": 0.16935327649116516, + "epoch": 0.19576271186440677, + "grad_norm": 0.11486942308015832, + "learning_rate": 3.9152542372881355e-05, + "loss": -0.0295, + "step": 231 + }, + { + "clip_ratio": 0.22958868741989136, + "epoch": 0.19661016949152543, + "grad_norm": 0.10892713241904037, + "learning_rate": 3.932203389830509e-05, + "loss": -0.0314, + "step": 232 + }, + { + "clip_ratio": 0.003605353645980358, + "completion_length": 66.10714721679688, + "epoch": 0.19745762711864406, + "grad_norm": 0.24973476558992524, + "learning_rate": 3.9491525423728816e-05, + "loss": -0.0118, + "num_tokens": 1443140.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 233 + }, + { + "clip_ratio": 0.029195427894592285, + "epoch": 0.19830508474576272, + "grad_norm": 0.1595699714332021, + "learning_rate": 3.966101694915255e-05, + "loss": -0.0189, + "step": 234 + }, + { + "clip_ratio": 0.10283487290143967, + "epoch": 0.19915254237288135, + "grad_norm": 0.11474727019285232, + "learning_rate": 3.9830508474576276e-05, + "loss": -0.0247, + "step": 235 + }, + { + "clip_ratio": 0.15862122178077698, + "epoch": 0.2, + "grad_norm": 0.10043744347803148, + "learning_rate": 4e-05, + "loss": -0.029, + "step": 236 + }, + { + "clip_ratio": 0.0013605443527922034, + "completion_length": 74.55357360839844, + "epoch": 0.20084745762711864, + "grad_norm": 0.13987954732136554, + "learning_rate": 3.9981167608286254e-05, + "loss": -0.0433, + "num_tokens": 1454515.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 237 + }, + { + "clip_ratio": 0.010904515162110329, + "epoch": 0.2016949152542373, + "grad_norm": 0.10644135758363778, + "learning_rate": 3.9962335216572505e-05, + "loss": -0.0464, + "step": 238 + }, + { + "clip_ratio": 0.05173995718359947, + "epoch": 0.20254237288135593, + "grad_norm": 0.08987160994189367, + "learning_rate": 3.994350282485876e-05, + "loss": -0.0494, + "step": 239 + }, + { + "clip_ratio": 0.10260221362113953, + "epoch": 0.2033898305084746, + "grad_norm": 0.07087528775663905, + "learning_rate": 3.9924670433145014e-05, + "loss": -0.0523, + "step": 240 + }, + { + "clip_ratio": 0.0006479613948613405, + "completion_length": 73.28572082519531, + "epoch": 0.20423728813559322, + "grad_norm": 0.3445626306759668, + "learning_rate": 3.9905838041431265e-05, + "loss": 0.0365, + "num_tokens": 1466203.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 241 + }, + { + "clip_ratio": 0.061018019914627075, + "epoch": 0.20508474576271185, + "grad_norm": 0.17586034661375718, + "learning_rate": 3.9887005649717516e-05, + "loss": 0.0285, + "step": 242 + }, + { + "clip_ratio": 0.21443673968315125, + "epoch": 0.2059322033898305, + "grad_norm": 0.16852265377216533, + "learning_rate": 3.986817325800377e-05, + "loss": 0.0245, + "step": 243 + }, + { + "clip_ratio": 0.3032749891281128, + "epoch": 0.20677966101694914, + "grad_norm": 0.16538030606379006, + "learning_rate": 3.984934086629002e-05, + "loss": 0.0219, + "step": 244 + }, + { + "clip_ratio": 0.001159251551143825, + "completion_length": 80.375, + "epoch": 0.2076271186440678, + "grad_norm": 0.17416776142167675, + "learning_rate": 3.9830508474576276e-05, + "loss": -0.0293, + "num_tokens": 1477712.0, + "reward": 0.8928571939468384, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 245 + }, + { + "clip_ratio": 0.018606197088956833, + "epoch": 0.20847457627118643, + "grad_norm": 0.13288705840160372, + "learning_rate": 3.981167608286253e-05, + "loss": -0.0342, + "step": 246 + }, + { + "clip_ratio": 0.07409250736236572, + "epoch": 0.2093220338983051, + "grad_norm": 0.11062194988477761, + "learning_rate": 3.979284369114878e-05, + "loss": -0.0387, + "step": 247 + }, + { + "clip_ratio": 0.13684464991092682, + "epoch": 0.21016949152542372, + "grad_norm": 0.09630587836377022, + "learning_rate": 3.9774011299435036e-05, + "loss": -0.0423, + "step": 248 + }, + { + "clip_ratio": 0.0009059179574251175, + "completion_length": 55.892860412597656, + "epoch": 0.21101694915254238, + "grad_norm": 0.30277389882138056, + "learning_rate": 3.975517890772128e-05, + "loss": -0.0232, + "num_tokens": 1489946.0, + "reward": 0.8214285969734192, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": 0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 249 + }, + { + "clip_ratio": 0.05273974686861038, + "epoch": 0.211864406779661, + "grad_norm": 0.20551737732816863, + "learning_rate": 3.973634651600754e-05, + "loss": -0.0352, + "step": 250 + }, + { + "clip_ratio": 0.15495876967906952, + "epoch": 0.21271186440677967, + "grad_norm": 0.23192855972985502, + "learning_rate": 3.971751412429379e-05, + "loss": -0.0428, + "step": 251 + }, + { + "clip_ratio": 0.17651182413101196, + "epoch": 0.2135593220338983, + "grad_norm": 0.153802982923592, + "learning_rate": 3.969868173258004e-05, + "loss": -0.0503, + "step": 252 + }, + { + "clip_ratio": 0.001365313190035522, + "completion_length": 74.5, + "epoch": 0.21440677966101696, + "grad_norm": 0.1294886128912843, + "learning_rate": 3.967984934086629e-05, + "loss": -0.0165, + "num_tokens": 1501150.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 253 + }, + { + "clip_ratio": 0.029109954833984375, + "epoch": 0.21525423728813559, + "grad_norm": 0.07796443960667383, + "learning_rate": 3.966101694915255e-05, + "loss": -0.0187, + "step": 254 + }, + { + "clip_ratio": 0.1373310536146164, + "epoch": 0.21610169491525424, + "grad_norm": 0.0719203190228422, + "learning_rate": 3.9642184557438794e-05, + "loss": -0.0211, + "step": 255 + }, + { + "clip_ratio": 0.24434244632720947, + "epoch": 0.21694915254237288, + "grad_norm": 0.0766668656235949, + "learning_rate": 3.962335216572505e-05, + "loss": -0.023, + "step": 256 + }, + { + "clip_ratio": 0.0014079277170822024, + "completion_length": 76.76786041259766, + "epoch": 0.21779661016949153, + "grad_norm": 0.14896557612902658, + "learning_rate": 3.96045197740113e-05, + "loss": -0.048, + "num_tokens": 1513457.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 257 + }, + { + "clip_ratio": 0.008702627383172512, + "epoch": 0.21864406779661016, + "grad_norm": 0.12790944068712845, + "learning_rate": 3.9585687382297554e-05, + "loss": -0.0515, + "step": 258 + }, + { + "clip_ratio": 0.05537901073694229, + "epoch": 0.21949152542372882, + "grad_norm": 0.0901106115692995, + "learning_rate": 3.956685499058381e-05, + "loss": -0.0559, + "step": 259 + }, + { + "clip_ratio": 0.12627661228179932, + "epoch": 0.22033898305084745, + "grad_norm": 0.08842019141814955, + "learning_rate": 3.954802259887006e-05, + "loss": -0.0589, + "step": 260 + }, + { + "clip_ratio": 0.001680672401562333, + "completion_length": 68.46428680419922, + "epoch": 0.2211864406779661, + "grad_norm": 0.45455295411779023, + "learning_rate": 3.9529190207156314e-05, + "loss": 0.0253, + "num_tokens": 1523675.0, + "reward": 0.785714328289032, + "reward_std": 0.35475122928619385, + "rewards/check_winston_local_func/mean": 0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241878271102905, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 261 + }, + { + "clip_ratio": 0.08457090705633163, + "epoch": 0.22203389830508474, + "grad_norm": 0.2750192097608069, + "learning_rate": 3.9510357815442565e-05, + "loss": 0.0122, + "step": 262 + }, + { + "clip_ratio": 0.14618617296218872, + "epoch": 0.2228813559322034, + "grad_norm": 0.21776056884947845, + "learning_rate": 3.9491525423728816e-05, + "loss": 0.0035, + "step": 263 + }, + { + "clip_ratio": 0.16797243058681488, + "epoch": 0.22372881355932203, + "grad_norm": 0.1552963639704198, + "learning_rate": 3.947269303201507e-05, + "loss": -0.0027, + "step": 264 + }, + { + "clip_ratio": 0.0041149333119392395, + "completion_length": 92.05357360839844, + "epoch": 0.2245762711864407, + "grad_norm": 0.0, + "learning_rate": 3.9453860640301325e-05, + "loss": 0.0, + "num_tokens": 1537926.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 265 + }, + { + "clip_ratio": 0.015582824125885963, + "epoch": 0.22542372881355932, + "grad_norm": 0.0, + "learning_rate": 3.943502824858757e-05, + "loss": 0.0, + "step": 266 + }, + { + "clip_ratio": 0.05549276992678642, + "epoch": 0.22627118644067798, + "grad_norm": 0.0, + "learning_rate": 3.941619585687383e-05, + "loss": 0.0, + "step": 267 + }, + { + "clip_ratio": 0.10348478704690933, + "epoch": 0.2271186440677966, + "grad_norm": 0.0, + "learning_rate": 3.939736346516008e-05, + "loss": 0.0, + "step": 268 + }, + { + "clip_ratio": 0.0024110758677124977, + "completion_length": 132.5357208251953, + "epoch": 0.22796610169491524, + "grad_norm": 0.09866009376925343, + "learning_rate": 3.937853107344633e-05, + "loss": -0.0252, + "num_tokens": 1552892.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 269 + }, + { + "clip_ratio": 0.009020349942147732, + "epoch": 0.2288135593220339, + "grad_norm": 0.08611769992817986, + "learning_rate": 3.935969868173259e-05, + "loss": -0.0267, + "step": 270 + }, + { + "clip_ratio": 0.03130246326327324, + "epoch": 0.22966101694915253, + "grad_norm": 0.08283957691220468, + "learning_rate": 3.934086629001884e-05, + "loss": -0.0288, + "step": 271 + }, + { + "clip_ratio": 0.06420135498046875, + "epoch": 0.2305084745762712, + "grad_norm": 0.07319871503015539, + "learning_rate": 3.932203389830509e-05, + "loss": -0.0314, + "step": 272 + }, + { + "clip_ratio": 0.0030047716572880745, + "completion_length": 85.9464340209961, + "epoch": 0.23135593220338982, + "grad_norm": 0.42479217252605955, + "learning_rate": 3.930320150659134e-05, + "loss": 0.0151, + "num_tokens": 1564737.0, + "reward": 0.8571429252624512, + "reward_std": 0.3342905342578888, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 273 + }, + { + "clip_ratio": 0.046567775309085846, + "epoch": 0.23220338983050848, + "grad_norm": 0.25875493543994865, + "learning_rate": 3.928436911487759e-05, + "loss": 0.0039, + "step": 274 + }, + { + "clip_ratio": 0.11683137714862823, + "epoch": 0.2330508474576271, + "grad_norm": 0.15568587648106266, + "learning_rate": 3.926553672316384e-05, + "loss": -0.0039, + "step": 275 + }, + { + "clip_ratio": 0.1598564237356186, + "epoch": 0.23389830508474577, + "grad_norm": 0.12141989924883649, + "learning_rate": 3.92467043314501e-05, + "loss": -0.0076, + "step": 276 + }, + { + "clip_ratio": 0.001374253653921187, + "completion_length": 122.96429443359375, + "epoch": 0.2347457627118644, + "grad_norm": 0.18782615643703865, + "learning_rate": 3.922787193973635e-05, + "loss": -0.026, + "num_tokens": 1579727.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 277 + }, + { + "clip_ratio": 0.017132315784692764, + "epoch": 0.23559322033898306, + "grad_norm": 0.12401604739808049, + "learning_rate": 3.92090395480226e-05, + "loss": -0.031, + "step": 278 + }, + { + "clip_ratio": 0.05705662816762924, + "epoch": 0.2364406779661017, + "grad_norm": 0.0762353013620226, + "learning_rate": 3.919020715630885e-05, + "loss": -0.034, + "step": 279 + }, + { + "clip_ratio": 0.09824671596288681, + "epoch": 0.23728813559322035, + "grad_norm": 0.07414316824181627, + "learning_rate": 3.9171374764595104e-05, + "loss": -0.0361, + "step": 280 + }, + { + "clip_ratio": 0.006891847122460604, + "completion_length": 111.14286041259766, + "epoch": 0.23813559322033898, + "grad_norm": 0.44887370177976565, + "learning_rate": 3.9152542372881355e-05, + "loss": -0.0147, + "num_tokens": 1592855.0, + "reward": 0.7500000596046448, + "reward_std": 0.637336254119873, + "rewards/check_winston_local_func/mean": 0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 281 + }, + { + "clip_ratio": 0.0862836092710495, + "epoch": 0.23898305084745763, + "grad_norm": 0.2905997622704694, + "learning_rate": 3.913370998116761e-05, + "loss": -0.0255, + "step": 282 + }, + { + "clip_ratio": 0.13213881850242615, + "epoch": 0.23983050847457626, + "grad_norm": 0.20046065755709935, + "learning_rate": 3.9114877589453864e-05, + "loss": -0.036, + "step": 283 + }, + { + "clip_ratio": 0.20514217019081116, + "epoch": 0.24067796610169492, + "grad_norm": 0.17822083347245274, + "learning_rate": 3.9096045197740115e-05, + "loss": -0.0417, + "step": 284 + }, + { + "clip_ratio": 0.0007382220355793834, + "completion_length": 90.14286041259766, + "epoch": 0.24152542372881355, + "grad_norm": 0.05844943977633127, + "learning_rate": 3.907721280602637e-05, + "loss": -0.0145, + "num_tokens": 1605287.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 285 + }, + { + "clip_ratio": 0.006259975954890251, + "epoch": 0.2423728813559322, + "grad_norm": 0.04883518588447698, + "learning_rate": 3.905838041431262e-05, + "loss": -0.015, + "step": 286 + }, + { + "clip_ratio": 0.023042459040880203, + "epoch": 0.24322033898305084, + "grad_norm": 0.04225419018938037, + "learning_rate": 3.9039548022598875e-05, + "loss": -0.0156, + "step": 287 + }, + { + "clip_ratio": 0.04386242851614952, + "epoch": 0.2440677966101695, + "grad_norm": 0.03866602121110847, + "learning_rate": 3.9020715630885127e-05, + "loss": -0.0163, + "step": 288 + }, + { + "clip_ratio": 0.0029555519577115774, + "completion_length": 144.98214721679688, + "epoch": 0.24491525423728813, + "grad_norm": 0.1262690850004453, + "learning_rate": 3.900188323917138e-05, + "loss": -0.0343, + "num_tokens": 1620742.0, + "reward": 0.8571429252624512, + "reward_std": 0.4040610194206238, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 289 + }, + { + "clip_ratio": 0.0132750254124403, + "epoch": 0.2457627118644068, + "grad_norm": 0.10191416605166659, + "learning_rate": 3.898305084745763e-05, + "loss": -0.0377, + "step": 290 + }, + { + "clip_ratio": 0.043000176548957825, + "epoch": 0.24661016949152542, + "grad_norm": 0.08414775760035112, + "learning_rate": 3.896421845574388e-05, + "loss": -0.0405, + "step": 291 + }, + { + "clip_ratio": 0.07677298784255981, + "epoch": 0.24745762711864408, + "grad_norm": 0.07673330413564883, + "learning_rate": 3.894538606403013e-05, + "loss": -0.0436, + "step": 292 + }, + { + "clip_ratio": 0.0007677033427171409, + "completion_length": 314.64288330078125, + "epoch": 0.2483050847457627, + "grad_norm": 0.05727067166697756, + "learning_rate": 3.892655367231639e-05, + "loss": 0.0061, + "num_tokens": 1645538.0, + "reward": 0.8571429252624512, + "reward_std": 0.15272071957588196, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 293 + }, + { + "clip_ratio": 0.000989561784081161, + "epoch": 0.24915254237288137, + "grad_norm": 0.05658381403070837, + "learning_rate": 3.890772128060264e-05, + "loss": 0.0055, + "step": 294 + }, + { + "clip_ratio": 0.002105026040226221, + "epoch": 0.25, + "grad_norm": 0.05358318750720369, + "learning_rate": 3.888888888888889e-05, + "loss": 0.0045, + "step": 295 + }, + { + "clip_ratio": 0.008737047202885151, + "epoch": 0.25084745762711863, + "grad_norm": 0.04703537375755522, + "learning_rate": 3.887005649717515e-05, + "loss": 0.003, + "step": 296 + }, + { + "clip_ratio": 0.001839210744947195, + "completion_length": 134.08929443359375, + "epoch": 0.25169491525423726, + "grad_norm": 0.13649134617830305, + "learning_rate": 3.885122410546139e-05, + "loss": -0.0194, + "num_tokens": 1660599.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 297 + }, + { + "clip_ratio": 0.009821072220802307, + "epoch": 0.25254237288135595, + "grad_norm": 0.0691252012642643, + "learning_rate": 3.883239171374765e-05, + "loss": -0.0207, + "step": 298 + }, + { + "clip_ratio": 0.027830438688397408, + "epoch": 0.2533898305084746, + "grad_norm": 0.04974246695392892, + "learning_rate": 3.88135593220339e-05, + "loss": -0.0215, + "step": 299 + }, + { + "clip_ratio": 0.05817332863807678, + "epoch": 0.2542372881355932, + "grad_norm": 0.04524622630022836, + "learning_rate": 3.879472693032015e-05, + "loss": -0.022, + "step": 300 + } + ], + "logging_steps": 1, + "max_steps": 2360, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1261db78e0a310bb2e0bd6333e2741bd2c4391ea --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3bd8ba987ac3c91f8253f49fc4f0e162f8c1db67922f9a6a6a7ad4757383ff +size 7544 diff --git a/checkpoint-300/zero_to_fp32.py b/checkpoint-300/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/checkpoint-300/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a0f59b7b91d61179514d6e990a01e588f1af99b0 --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.0 \ No newline at end of file diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..29e0b3a4b795e316b1a7ba9b7dc790302a9d6e0f --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "gate_proj", + "o_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff6102b44d29c911f37409698552bd2725d64a55 --- /dev/null +++ b/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a04362aed174c721afd8b7d99093331a930bf741a8c54c4d1fa6d73552fa36a +size 167832688 diff --git a/checkpoint-400/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-400/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d87848d1e23055f41d532073e885c0be75854e60 --- /dev/null +++ b/checkpoint-400/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5bf6cf5461f4631d459a9e899dff23480876b6978f4b2e9e0a3ab779890027f +size 72284496 diff --git a/checkpoint-400/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/checkpoint-400/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08c2a66237250734a3144b79bb63d7e400a2052f --- /dev/null +++ b/checkpoint-400/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f78675838df58378d15209fc32f1504b28ede4df46c358853e17f1630a535c40 +size 72284496 diff --git a/checkpoint-400/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/checkpoint-400/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d5a622fd33631f7d347f7ac545fa3420c42f568 --- /dev/null +++ b/checkpoint-400/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d32f662d540b52b1c08375473aa7b9b00d15a92ec6310fb031b5f07309b633b +size 72284496 diff --git a/checkpoint-400/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/checkpoint-400/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..daeda1bbfd75acacf1154fbd5eceff4d005c1425 --- /dev/null +++ b/checkpoint-400/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183abed0476949d4fcbd45b6796d4a57ba79f4d0ae2342a55a4e7e4ece3d10bc +size 72284496 diff --git a/checkpoint-400/global_step400/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/checkpoint-400/global_step400/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b0188c379c6756dea40ae7bd583486be141614b --- /dev/null +++ b/checkpoint-400/global_step400/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7e67216ad7fa187c3db2a7c5163f52261f40164b2a763c29ba622cf3dca048 +size 72284496 diff --git a/checkpoint-400/global_step400/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/checkpoint-400/global_step400/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1430c1e3c316da7b216d32429068c32892ad6c51 --- /dev/null +++ b/checkpoint-400/global_step400/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f55905fbe0dc2004b0730631c8f5addc09d3e01cd538182f2c90068d0636a2 +size 72284496 diff --git a/checkpoint-400/global_step400/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/checkpoint-400/global_step400/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b9431e8bb8b11cc997771daa11f576e8d5d250d --- /dev/null +++ b/checkpoint-400/global_step400/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fde5c2055bf784a44a2c782a25474a243095e31ca6c2ebb3130ed22a9f2f321 +size 72284496 diff --git a/checkpoint-400/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-400/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..945d05c581ea1116fd829ce711cb0323961ba986 --- /dev/null +++ b/checkpoint-400/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c94d41ad5e024e89c7e17f8fba0c6f99d3fbb940edb33138e8088722ae8192c +size 443182 diff --git a/checkpoint-400/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt b/checkpoint-400/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf5529ab8fa0b5761f9055327ae59783c32f317c --- /dev/null +++ b/checkpoint-400/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ecc3082d41bd112d5b3c492cfeb2f6ceded32b31bfb817a264fa0b86f8aac6 +size 443182 diff --git a/checkpoint-400/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt b/checkpoint-400/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e45e96c89112e40b3ce42999b10114eef8e5ed8 --- /dev/null +++ b/checkpoint-400/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616bb9000f92712dd7220faf3763111b8800bd9981805d42bb7475368929f718 +size 443182 diff --git a/checkpoint-400/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt b/checkpoint-400/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac86f369b2cbc300a926dc1a8aa609f8758b047e --- /dev/null +++ b/checkpoint-400/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f157a11938e3b5e4daaf6f74851df7408711d1e73e58d467d026b7e58c396d2a +size 443182 diff --git a/checkpoint-400/global_step400/zero_pp_rank_4_mp_rank_00_model_states.pt b/checkpoint-400/global_step400/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9356f7e2a1fdd2438241b74188ccb1ccc88d96a6 --- /dev/null +++ b/checkpoint-400/global_step400/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25710d0860ce99ed3ffd2db9cc26948a3af43b187223905fc50d4ec7109cac6e +size 443182 diff --git a/checkpoint-400/global_step400/zero_pp_rank_5_mp_rank_00_model_states.pt b/checkpoint-400/global_step400/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c93b04122a5ab8a8cc339087feea57e36833f97 --- /dev/null +++ b/checkpoint-400/global_step400/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f7f01b6a08806bcef2833a905559bd18dda577ef9dcdb2fa5649a35fe68ede +size 443182 diff --git a/checkpoint-400/global_step400/zero_pp_rank_6_mp_rank_00_model_states.pt b/checkpoint-400/global_step400/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ca9b2010453907de21f264a4efa6ce5fbecfe58 --- /dev/null +++ b/checkpoint-400/global_step400/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a4dd2ca87f4148a3af4eb9db62a904ea2a758192fc47ce69ff6ecfeae4d1145 +size 443182 diff --git a/checkpoint-400/latest b/checkpoint-400/latest new file mode 100644 index 0000000000000000000000000000000000000000..e5bdf58d4f29d34e909da25905fad376f73e7c29 --- /dev/null +++ b/checkpoint-400/latest @@ -0,0 +1 @@ +global_step400 \ No newline at end of file diff --git a/checkpoint-400/rng_state_0.pth b/checkpoint-400/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..f8f6300e8d22a4361b6d5045128140c0260c9730 --- /dev/null +++ b/checkpoint-400/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567913fff2e6621037eec5bacab885cb9d04bca92b027f81161bf569ce7c88c4 +size 15728 diff --git a/checkpoint-400/rng_state_1.pth b/checkpoint-400/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..f67ec3a10105fa65573dd7e233035883f6ea83e1 --- /dev/null +++ b/checkpoint-400/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:424c85510aef3b1a8b72e553fb668e429b5bf673310e467579ffeef1f7567dbe +size 15728 diff --git a/checkpoint-400/rng_state_2.pth b/checkpoint-400/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d89cdd5a77e44b8e63dc5989c3553dee9f1f4d7 --- /dev/null +++ b/checkpoint-400/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90b5e0b01da47f3c51da53c3dcac2048c3cc65a459adda82b33413cc8067573 +size 15728 diff --git a/checkpoint-400/rng_state_3.pth b/checkpoint-400/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..c1eea5f7d961060665e70f6dd20acf2dd8b363cc --- /dev/null +++ b/checkpoint-400/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03da679d9c0a3680f147df486228c81bba1a908a3baa711d4573aca1558b5aa9 +size 15792 diff --git a/checkpoint-400/rng_state_4.pth b/checkpoint-400/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..0730a4ab659e26056a5a0e4882261353c027f371 --- /dev/null +++ b/checkpoint-400/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f934d6bc7cf6de85b9351177c464d5b86677a36e66d8e9511ce4643ea28bb8 +size 15728 diff --git a/checkpoint-400/rng_state_5.pth b/checkpoint-400/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..81a023b996dc045c39f09b6ee35111037735e390 --- /dev/null +++ b/checkpoint-400/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e963cc76a8fa52192cfc3d0978624725500eba05eb29c2b629dfba7bee641ce +size 15728 diff --git a/checkpoint-400/rng_state_6.pth b/checkpoint-400/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..8111a5a46c5950682baed2e9f460e72163e0231a --- /dev/null +++ b/checkpoint-400/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7bf6c9c971614bf4b84806353365d3e025b6c471988c101cd85ce97250235ad +size 15728 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..85143dde6f4b63375ac5049b9446571a11d700aa --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae924865dbada745e1477fef5e182a11c548b4262c6069fbb1813bcf2d1d1c4a +size 1064 diff --git a/checkpoint-400/special_tokens_map.json b/checkpoint-400/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-400/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-400/tokenizer.json b/checkpoint-400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..92cc72bfcc2faff4ba96750b21c7d2e3cb92d25c --- /dev/null +++ b/checkpoint-400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ff5472d095ccd9332d9e723153d7bc7226cb6be9c1bffda738b5ba2e71bf26 +size 17210084 diff --git a/checkpoint-400/tokenizer_config.json b/checkpoint-400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ae1bc63bd6e5ca8a863628311061c143679ff93 --- /dev/null +++ b/checkpoint-400/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..524336b1256f15909a1c156e94dd56908826c402 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,4234 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3389830508474576, + "eval_steps": 500, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio": 0.0, + "completion_length": 396.3571472167969, + "epoch": 0.000847457627118644, + "grad_norm": 0.028597827622128653, + "learning_rate": 1.6949152542372883e-07, + "loss": 0.0096, + "num_tokens": 29860.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 1 + }, + { + "clip_ratio": 0.0, + "epoch": 0.001694915254237288, + "grad_norm": 0.0283861264343528, + "learning_rate": 3.3898305084745766e-07, + "loss": 0.0096, + "step": 2 + }, + { + "clip_ratio": 0.0005210353410802782, + "epoch": 0.002542372881355932, + "grad_norm": 0.024416377206652233, + "learning_rate": 5.084745762711865e-07, + "loss": 0.0095, + "step": 3 + }, + { + "clip_ratio": 0.0003804714942816645, + "epoch": 0.003389830508474576, + "grad_norm": 0.024954590093213137, + "learning_rate": 6.779661016949153e-07, + "loss": 0.0096, + "step": 4 + }, + { + "clip_ratio": 0.00028131139697507024, + "completion_length": 477.6250305175781, + "epoch": 0.00423728813559322, + "grad_norm": 0.0, + "learning_rate": 8.474576271186441e-07, + "loss": 0.0, + "num_tokens": 64207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 5 + }, + { + "clip_ratio": 0.00026464727125130594, + "epoch": 0.005084745762711864, + "grad_norm": 0.0, + "learning_rate": 1.016949152542373e-06, + "loss": 0.0, + "step": 6 + }, + { + "clip_ratio": 0.0003427764168009162, + "epoch": 0.005932203389830509, + "grad_norm": 0.0, + "learning_rate": 1.186440677966102e-06, + "loss": 0.0, + "step": 7 + }, + { + "clip_ratio": 0.0003427252813708037, + "epoch": 0.006779661016949152, + "grad_norm": 0.0, + "learning_rate": 1.3559322033898307e-06, + "loss": 0.0, + "step": 8 + }, + { + "clip_ratio": 0.0003535364812705666, + "completion_length": 503.14288330078125, + "epoch": 0.007627118644067797, + "grad_norm": 0.0, + "learning_rate": 1.5254237288135596e-06, + "loss": 0.0, + "num_tokens": 99207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 9 + }, + { + "clip_ratio": 0.00017467686848249286, + "epoch": 0.00847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.6949152542372882e-06, + "loss": 0.0, + "step": 10 + }, + { + "clip_ratio": 0.0002140275464626029, + "epoch": 0.009322033898305085, + "grad_norm": 0.0, + "learning_rate": 1.8644067796610171e-06, + "loss": 0.0, + "step": 11 + }, + { + "clip_ratio": 0.00035844597732648253, + "epoch": 0.010169491525423728, + "grad_norm": 0.0, + "learning_rate": 2.033898305084746e-06, + "loss": 0.0, + "step": 12 + }, + { + "clip_ratio": 0.00035540881799533963, + "completion_length": 471.83929443359375, + "epoch": 0.011016949152542373, + "grad_norm": 0.0, + "learning_rate": 2.203389830508475e-06, + "loss": 0.0, + "num_tokens": 132582.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 13 + }, + { + "clip_ratio": 0.0002507771132513881, + "epoch": 0.011864406779661017, + "grad_norm": 0.0, + "learning_rate": 2.372881355932204e-06, + "loss": 0.0, + "step": 14 + }, + { + "clip_ratio": 0.0001079499488696456, + "epoch": 0.012711864406779662, + "grad_norm": 0.0, + "learning_rate": 2.5423728813559323e-06, + "loss": 0.0, + "step": 15 + }, + { + "clip_ratio": 0.00021258163906168193, + "epoch": 0.013559322033898305, + "grad_norm": 0.0, + "learning_rate": 2.7118644067796613e-06, + "loss": 0.0, + "step": 16 + }, + { + "clip_ratio": 0.000322989042615518, + "completion_length": 387.14288330078125, + "epoch": 0.01440677966101695, + "grad_norm": 0.016452011518392446, + "learning_rate": 2.8813559322033903e-06, + "loss": 0.0658, + "num_tokens": 161406.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 17 + }, + { + "clip_ratio": 0.00034964055521413684, + "epoch": 0.015254237288135594, + "grad_norm": 0.017719451531367687, + "learning_rate": 3.0508474576271192e-06, + "loss": 0.0657, + "step": 18 + }, + { + "clip_ratio": 0.0004103984101675451, + "epoch": 0.016101694915254237, + "grad_norm": 0.016469439956852048, + "learning_rate": 3.2203389830508473e-06, + "loss": 0.0657, + "step": 19 + }, + { + "clip_ratio": 0.0003408819029573351, + "epoch": 0.01694915254237288, + "grad_norm": 0.017326107824003897, + "learning_rate": 3.3898305084745763e-06, + "loss": 0.0657, + "step": 20 + }, + { + "clip_ratio": 0.00046000577276572585, + "completion_length": 481.732177734375, + "epoch": 0.017796610169491526, + "grad_norm": 0.0, + "learning_rate": 3.5593220338983053e-06, + "loss": 0.0, + "num_tokens": 195711.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 21 + }, + { + "clip_ratio": 0.00042848457815125585, + "epoch": 0.01864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.7288135593220342e-06, + "loss": 0.0, + "step": 22 + }, + { + "clip_ratio": 0.0004297326668165624, + "epoch": 0.019491525423728815, + "grad_norm": 0.0, + "learning_rate": 3.898305084745763e-06, + "loss": 0.0, + "step": 23 + }, + { + "clip_ratio": 0.000281251355772838, + "epoch": 0.020338983050847456, + "grad_norm": 0.0, + "learning_rate": 4.067796610169492e-06, + "loss": 0.0, + "step": 24 + }, + { + "clip_ratio": 0.00017563004803378135, + "completion_length": 442.7500305175781, + "epoch": 0.0211864406779661, + "grad_norm": 0.11157048303951664, + "learning_rate": 4.23728813559322e-06, + "loss": 0.0104, + "num_tokens": 227185.0, + "reward": -0.8214285969734192, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 25 + }, + { + "clip_ratio": 0.00010569583537289873, + "epoch": 0.022033898305084745, + "grad_norm": 0.12213723346474271, + "learning_rate": 4.40677966101695e-06, + "loss": 0.0104, + "step": 26 + }, + { + "clip_ratio": 0.0005364188691601157, + "epoch": 0.02288135593220339, + "grad_norm": 0.11319483991164629, + "learning_rate": 4.576271186440678e-06, + "loss": 0.0106, + "step": 27 + }, + { + "clip_ratio": 0.0010358322178944945, + "epoch": 0.023728813559322035, + "grad_norm": 0.10119136649790463, + "learning_rate": 4.745762711864408e-06, + "loss": 0.0101, + "step": 28 + }, + { + "clip_ratio": 0.0002854761842172593, + "completion_length": 420.51788330078125, + "epoch": 0.02457627118644068, + "grad_norm": 0.0, + "learning_rate": 4.915254237288136e-06, + "loss": 0.0, + "num_tokens": 257614.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 29 + }, + { + "clip_ratio": 0.00021371705224737525, + "epoch": 0.025423728813559324, + "grad_norm": 0.0, + "learning_rate": 5.084745762711865e-06, + "loss": 0.0, + "step": 30 + }, + { + "clip_ratio": 0.00016422003682237118, + "epoch": 0.026271186440677965, + "grad_norm": 0.0, + "learning_rate": 5.254237288135594e-06, + "loss": 0.0, + "step": 31 + }, + { + "clip_ratio": 0.000256577244726941, + "epoch": 0.02711864406779661, + "grad_norm": 0.0, + "learning_rate": 5.423728813559323e-06, + "loss": 0.0, + "step": 32 + }, + { + "clip_ratio": 0.00045646229409612715, + "completion_length": 465.1250305175781, + "epoch": 0.027966101694915254, + "grad_norm": 0.017873238036622066, + "learning_rate": 5.593220338983051e-06, + "loss": 0.0246, + "num_tokens": 290581.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 33 + }, + { + "clip_ratio": 0.0006314113852567971, + "epoch": 0.0288135593220339, + "grad_norm": 0.01732638271233714, + "learning_rate": 5.7627118644067805e-06, + "loss": 0.0247, + "step": 34 + }, + { + "clip_ratio": 0.00045800459338352084, + "epoch": 0.029661016949152543, + "grad_norm": 0.017593288926627842, + "learning_rate": 5.932203389830509e-06, + "loss": 0.0247, + "step": 35 + }, + { + "clip_ratio": 0.0004213759966660291, + "epoch": 0.030508474576271188, + "grad_norm": 0.017758527483606314, + "learning_rate": 6.1016949152542385e-06, + "loss": 0.0247, + "step": 36 + }, + { + "clip_ratio": 0.00027920620050281286, + "completion_length": 487.982177734375, + "epoch": 0.03135593220338983, + "grad_norm": 0.017492673426871806, + "learning_rate": 6.271186440677966e-06, + "loss": 0.0287, + "num_tokens": 325036.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 37 + }, + { + "clip_ratio": 0.0003654122701846063, + "epoch": 0.03220338983050847, + "grad_norm": 0.016942624524485753, + "learning_rate": 6.440677966101695e-06, + "loss": 0.0287, + "step": 38 + }, + { + "clip_ratio": 0.0002445173158776015, + "epoch": 0.03305084745762712, + "grad_norm": 0.017357366453315624, + "learning_rate": 6.610169491525424e-06, + "loss": 0.0287, + "step": 39 + }, + { + "clip_ratio": 0.00027939456049352884, + "epoch": 0.03389830508474576, + "grad_norm": 0.017497160548341977, + "learning_rate": 6.779661016949153e-06, + "loss": 0.0287, + "step": 40 + }, + { + "clip_ratio": 0.00030169120873324573, + "completion_length": 337.76788330078125, + "epoch": 0.03474576271186441, + "grad_norm": 0.013386997712677729, + "learning_rate": 6.949152542372882e-06, + "loss": 0.0194, + "num_tokens": 351879.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 41 + }, + { + "clip_ratio": 0.000481679366203025, + "epoch": 0.03559322033898305, + "grad_norm": 0.013534365829241167, + "learning_rate": 7.1186440677966106e-06, + "loss": 0.0194, + "step": 42 + }, + { + "clip_ratio": 0.0006071141688153148, + "epoch": 0.036440677966101696, + "grad_norm": 0.013688658779614732, + "learning_rate": 7.288135593220339e-06, + "loss": 0.0193, + "step": 43 + }, + { + "clip_ratio": 0.0005443710251711309, + "epoch": 0.03728813559322034, + "grad_norm": 0.013415623466192152, + "learning_rate": 7.4576271186440685e-06, + "loss": 0.0194, + "step": 44 + }, + { + "clip_ratio": 0.00027171947294846177, + "completion_length": 358.6964416503906, + "epoch": 0.038135593220338986, + "grad_norm": 0.0, + "learning_rate": 7.627118644067797e-06, + "loss": 0.0, + "num_tokens": 379414.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 45 + }, + { + "clip_ratio": 0.00027013494400307536, + "epoch": 0.03898305084745763, + "grad_norm": 0.0, + "learning_rate": 7.796610169491526e-06, + "loss": 0.0, + "step": 46 + }, + { + "clip_ratio": 0.00023684222833253443, + "epoch": 0.03983050847457627, + "grad_norm": 0.0, + "learning_rate": 7.966101694915255e-06, + "loss": 0.0, + "step": 47 + }, + { + "clip_ratio": 0.0004315820406191051, + "epoch": 0.04067796610169491, + "grad_norm": 0.0, + "learning_rate": 8.135593220338983e-06, + "loss": 0.0, + "step": 48 + }, + { + "clip_ratio": 0.00034640118246898055, + "completion_length": 392.46429443359375, + "epoch": 0.04152542372881356, + "grad_norm": 0.05155975490631469, + "learning_rate": 8.305084745762712e-06, + "loss": -0.023, + "num_tokens": 408424.0, + "reward": -0.8571429252624512, + "reward_std": 0.24888646602630615, + "rewards/check_winston_local_func/mean": -0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 49 + }, + { + "clip_ratio": 0.00034579477505758405, + "epoch": 0.0423728813559322, + "grad_norm": 0.051568368553185584, + "learning_rate": 8.47457627118644e-06, + "loss": -0.0233, + "step": 50 + }, + { + "clip_ratio": 0.0005872369511052966, + "epoch": 0.043220338983050846, + "grad_norm": 0.054569986775825835, + "learning_rate": 8.64406779661017e-06, + "loss": -0.0235, + "step": 51 + }, + { + "clip_ratio": 0.00048618926666677, + "epoch": 0.04406779661016949, + "grad_norm": 0.05573624590215382, + "learning_rate": 8.8135593220339e-06, + "loss": -0.0236, + "step": 52 + }, + { + "clip_ratio": 0.000333156727720052, + "completion_length": 485.7500305175781, + "epoch": 0.044915254237288135, + "grad_norm": 0.0, + "learning_rate": 8.983050847457628e-06, + "loss": 0.0, + "num_tokens": 442986.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 53 + }, + { + "clip_ratio": 0.00042045177542604506, + "epoch": 0.04576271186440678, + "grad_norm": 0.0, + "learning_rate": 9.152542372881356e-06, + "loss": 0.0, + "step": 54 + }, + { + "clip_ratio": 0.00031678256345912814, + "epoch": 0.046610169491525424, + "grad_norm": 0.0, + "learning_rate": 9.322033898305085e-06, + "loss": 0.0, + "step": 55 + }, + { + "clip_ratio": 0.00010463170474395156, + "epoch": 0.04745762711864407, + "grad_norm": 0.0, + "learning_rate": 9.491525423728815e-06, + "loss": 0.0, + "step": 56 + }, + { + "clip_ratio": 0.0007074553286656737, + "completion_length": 428.3214416503906, + "epoch": 0.048305084745762714, + "grad_norm": 0.04153528214569023, + "learning_rate": 9.661016949152544e-06, + "loss": 0.0343, + "num_tokens": 473892.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 57 + }, + { + "clip_ratio": 0.0004013319849036634, + "epoch": 0.04915254237288136, + "grad_norm": 0.04657277213309362, + "learning_rate": 9.830508474576272e-06, + "loss": 0.0342, + "step": 58 + }, + { + "clip_ratio": 0.00044179416727274656, + "epoch": 0.05, + "grad_norm": 0.045153415468062494, + "learning_rate": 1e-05, + "loss": 0.0343, + "step": 59 + }, + { + "clip_ratio": 0.0007794442353770137, + "epoch": 0.05084745762711865, + "grad_norm": 0.035363902861678634, + "learning_rate": 1.016949152542373e-05, + "loss": 0.0339, + "step": 60 + }, + { + "clip_ratio": 0.00021712151647079736, + "completion_length": 299.8035888671875, + "epoch": 0.051694915254237285, + "grad_norm": 0.07205399219848665, + "learning_rate": 1.0338983050847458e-05, + "loss": 0.0477, + "num_tokens": 497465.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 61 + }, + { + "clip_ratio": 0.0002563712769187987, + "epoch": 0.05254237288135593, + "grad_norm": 0.07155354465871978, + "learning_rate": 1.0508474576271188e-05, + "loss": 0.0475, + "step": 62 + }, + { + "clip_ratio": 0.0001442718057660386, + "epoch": 0.053389830508474574, + "grad_norm": 0.07289445064494822, + "learning_rate": 1.0677966101694917e-05, + "loss": 0.0474, + "step": 63 + }, + { + "clip_ratio": 0.001116903149522841, + "epoch": 0.05423728813559322, + "grad_norm": 0.06596181254777028, + "learning_rate": 1.0847457627118645e-05, + "loss": 0.0468, + "step": 64 + }, + { + "clip_ratio": 0.00027901786961592734, + "completion_length": 480.4464416503906, + "epoch": 0.05508474576271186, + "grad_norm": 0.0, + "learning_rate": 1.1016949152542374e-05, + "loss": 0.0, + "num_tokens": 532266.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 65 + }, + { + "clip_ratio": 0.00037270825123414397, + "epoch": 0.05593220338983051, + "grad_norm": 0.0, + "learning_rate": 1.1186440677966102e-05, + "loss": 0.0, + "step": 66 + }, + { + "clip_ratio": 0.0006563978386111557, + "epoch": 0.05677966101694915, + "grad_norm": 0.0, + "learning_rate": 1.1355932203389833e-05, + "loss": 0.0, + "step": 67 + }, + { + "clip_ratio": 0.0008186621707864106, + "epoch": 0.0576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.1525423728813561e-05, + "loss": 0.0, + "step": 68 + }, + { + "clip_ratio": 0.0005370522267185152, + "completion_length": 420.3214416503906, + "epoch": 0.05847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.169491525423729e-05, + "loss": 0.0, + "num_tokens": 563380.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 69 + }, + { + "clip_ratio": 0.0007551547605544329, + "epoch": 0.059322033898305086, + "grad_norm": 0.0, + "learning_rate": 1.1864406779661018e-05, + "loss": 0.0, + "step": 70 + }, + { + "clip_ratio": 0.0004996137577109039, + "epoch": 0.06016949152542373, + "grad_norm": 0.0, + "learning_rate": 1.2033898305084745e-05, + "loss": 0.0, + "step": 71 + }, + { + "clip_ratio": 0.0007176484214141965, + "epoch": 0.061016949152542375, + "grad_norm": 0.0, + "learning_rate": 1.2203389830508477e-05, + "loss": 0.0, + "step": 72 + }, + { + "clip_ratio": 0.0004170738684479147, + "completion_length": 383.6964416503906, + "epoch": 0.06186440677966102, + "grad_norm": 0.01481240616851262, + "learning_rate": 1.2372881355932205e-05, + "loss": 0.0412, + "num_tokens": 592003.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 73 + }, + { + "clip_ratio": 0.0008365331450477242, + "epoch": 0.06271186440677966, + "grad_norm": 0.01522897212214854, + "learning_rate": 1.2542372881355932e-05, + "loss": 0.0411, + "step": 74 + }, + { + "clip_ratio": 0.000981268472969532, + "epoch": 0.0635593220338983, + "grad_norm": 0.014948882448171377, + "learning_rate": 1.2711864406779661e-05, + "loss": 0.0411, + "step": 75 + }, + { + "clip_ratio": 0.0006704007391817868, + "epoch": 0.06440677966101695, + "grad_norm": 0.015045917131498382, + "learning_rate": 1.288135593220339e-05, + "loss": 0.041, + "step": 76 + }, + { + "clip_ratio": 0.00022424904454965144, + "completion_length": 437.9821472167969, + "epoch": 0.06525423728813559, + "grad_norm": 0.030968041587588573, + "learning_rate": 1.305084745762712e-05, + "loss": 0.0453, + "num_tokens": 623050.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 77 + }, + { + "clip_ratio": 0.00053448136895895, + "epoch": 0.06610169491525424, + "grad_norm": 0.02976001587219013, + "learning_rate": 1.3220338983050848e-05, + "loss": 0.0453, + "step": 78 + }, + { + "clip_ratio": 0.0010130176087841392, + "epoch": 0.06694915254237288, + "grad_norm": 0.02743385432574901, + "learning_rate": 1.3389830508474577e-05, + "loss": 0.045, + "step": 79 + }, + { + "clip_ratio": 0.0011749044060707092, + "epoch": 0.06779661016949153, + "grad_norm": 0.025462048937107604, + "learning_rate": 1.3559322033898305e-05, + "loss": 0.045, + "step": 80 + }, + { + "clip_ratio": 0.001996266655623913, + "completion_length": 382.2321472167969, + "epoch": 0.06864406779661017, + "grad_norm": 0.13457631329414246, + "learning_rate": 1.3728813559322034e-05, + "loss": 0.0135, + "num_tokens": 651839.0, + "reward": -0.6785714626312256, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.6785714030265808, + "rewards/check_winston_local_func/std": 0.7411819696426392, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 81 + }, + { + "clip_ratio": 0.003203267464414239, + "epoch": 0.06949152542372881, + "grad_norm": 0.11807541511453928, + "learning_rate": 1.3898305084745764e-05, + "loss": 0.0128, + "step": 82 + }, + { + "clip_ratio": 0.011069249361753464, + "epoch": 0.07033898305084746, + "grad_norm": 0.0768781703261771, + "learning_rate": 1.4067796610169493e-05, + "loss": 0.0118, + "step": 83 + }, + { + "clip_ratio": 0.013229678384959698, + "epoch": 0.0711864406779661, + "grad_norm": 0.07925229229917279, + "learning_rate": 1.4237288135593221e-05, + "loss": 0.011, + "step": 84 + }, + { + "clip_ratio": 0.0002107896434608847, + "completion_length": 397.1964416503906, + "epoch": 0.07203389830508475, + "grad_norm": 0.0461083173277337, + "learning_rate": 1.440677966101695e-05, + "loss": 0.0389, + "num_tokens": 681218.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 85 + }, + { + "clip_ratio": 0.0010596371721476316, + "epoch": 0.07288135593220339, + "grad_norm": 0.04449467794694347, + "learning_rate": 1.4576271186440678e-05, + "loss": 0.0384, + "step": 86 + }, + { + "clip_ratio": 0.002870997181162238, + "epoch": 0.07372881355932204, + "grad_norm": 0.038978879976910054, + "learning_rate": 1.4745762711864408e-05, + "loss": 0.038, + "step": 87 + }, + { + "clip_ratio": 0.006624125875532627, + "epoch": 0.07457627118644068, + "grad_norm": 0.0364842012372814, + "learning_rate": 1.4915254237288137e-05, + "loss": 0.0377, + "step": 88 + }, + { + "clip_ratio": 0.00043057286529801786, + "completion_length": 399.64288330078125, + "epoch": 0.07542372881355933, + "grad_norm": 0.014090924578944663, + "learning_rate": 1.5084745762711865e-05, + "loss": 0.0328, + "num_tokens": 711078.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 89 + }, + { + "clip_ratio": 0.0018296982161700726, + "epoch": 0.07627118644067797, + "grad_norm": 0.014531205963070252, + "learning_rate": 1.5254237288135594e-05, + "loss": 0.0328, + "step": 90 + }, + { + "clip_ratio": 0.004530549980700016, + "epoch": 0.07711864406779662, + "grad_norm": 0.014754831265979268, + "learning_rate": 1.5423728813559326e-05, + "loss": 0.0327, + "step": 91 + }, + { + "clip_ratio": 0.008132151328027248, + "epoch": 0.07796610169491526, + "grad_norm": 0.014608619166449479, + "learning_rate": 1.5593220338983053e-05, + "loss": 0.0326, + "step": 92 + }, + { + "clip_ratio": 0.0007373582920990884, + "completion_length": 467.71429443359375, + "epoch": 0.0788135593220339, + "grad_norm": 0.041297580984419976, + "learning_rate": 1.576271186440678e-05, + "loss": 0.0616, + "num_tokens": 745862.0, + "reward": -0.8928571939468384, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 93 + }, + { + "clip_ratio": 0.001287531922571361, + "epoch": 0.07966101694915254, + "grad_norm": 0.030858648065290283, + "learning_rate": 1.593220338983051e-05, + "loss": 0.0614, + "step": 94 + }, + { + "clip_ratio": 0.0023924303241074085, + "epoch": 0.08050847457627118, + "grad_norm": 0.03463914321182917, + "learning_rate": 1.6101694915254237e-05, + "loss": 0.0613, + "step": 95 + }, + { + "clip_ratio": 0.00350037869066, + "epoch": 0.08135593220338982, + "grad_norm": 0.02665011286164521, + "learning_rate": 1.6271186440677967e-05, + "loss": 0.0611, + "step": 96 + }, + { + "clip_ratio": 0.0006918495637364686, + "completion_length": 320.75, + "epoch": 0.08220338983050847, + "grad_norm": 0.06373891470490567, + "learning_rate": 1.6440677966101697e-05, + "loss": -0.015, + "num_tokens": 771576.0, + "reward": -0.7500000596046448, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 97 + }, + { + "clip_ratio": 0.0029753418639302254, + "epoch": 0.08305084745762711, + "grad_norm": 0.05523249333421511, + "learning_rate": 1.6610169491525424e-05, + "loss": -0.0157, + "step": 98 + }, + { + "clip_ratio": 0.00716389948502183, + "epoch": 0.08389830508474576, + "grad_norm": 0.04924083222576615, + "learning_rate": 1.6779661016949154e-05, + "loss": -0.0158, + "step": 99 + }, + { + "clip_ratio": 0.011036296375095844, + "epoch": 0.0847457627118644, + "grad_norm": 0.04955323333773024, + "learning_rate": 1.694915254237288e-05, + "loss": -0.0163, + "step": 100 + }, + { + "clip_ratio": 0.00038607188616879284, + "completion_length": 507.2500305175781, + "epoch": 0.08559322033898305, + "grad_norm": 0.0, + "learning_rate": 1.711864406779661e-05, + "loss": 0.0, + "num_tokens": 807230.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 101 + }, + { + "clip_ratio": 0.0004233713843859732, + "epoch": 0.08644067796610169, + "grad_norm": 0.0, + "learning_rate": 1.728813559322034e-05, + "loss": 0.0, + "step": 102 + }, + { + "clip_ratio": 0.0005304253427311778, + "epoch": 0.08728813559322034, + "grad_norm": 0.0, + "learning_rate": 1.745762711864407e-05, + "loss": 0.0, + "step": 103 + }, + { + "clip_ratio": 0.0008094432414509356, + "epoch": 0.08813559322033898, + "grad_norm": 0.0, + "learning_rate": 1.76271186440678e-05, + "loss": 0.0, + "step": 104 + }, + { + "clip_ratio": 0.0003136220038868487, + "completion_length": 309.4821472167969, + "epoch": 0.08898305084745763, + "grad_norm": 0.1215376293190595, + "learning_rate": 1.7796610169491526e-05, + "loss": 0.059, + "num_tokens": 830873.0, + "reward": -0.6071428656578064, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.6071428656578064, + "rewards/check_winston_local_func/std": 0.8017837405204773, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 105 + }, + { + "clip_ratio": 0.005680752452462912, + "epoch": 0.08983050847457627, + "grad_norm": 0.08882976004122672, + "learning_rate": 1.7966101694915256e-05, + "loss": 0.057, + "step": 106 + }, + { + "clip_ratio": 0.013865095563232899, + "epoch": 0.09067796610169492, + "grad_norm": 0.07178187465318808, + "learning_rate": 1.8135593220338986e-05, + "loss": 0.0551, + "step": 107 + }, + { + "clip_ratio": 0.025337526574730873, + "epoch": 0.09152542372881356, + "grad_norm": 0.05889114052835241, + "learning_rate": 1.8305084745762713e-05, + "loss": 0.054, + "step": 108 + }, + { + "clip_ratio": 0.0004973930190317333, + "completion_length": 309.2857360839844, + "epoch": 0.0923728813559322, + "grad_norm": 0.10159993090017184, + "learning_rate": 1.8474576271186443e-05, + "loss": 0.1029, + "num_tokens": 856689.0, + "reward": -0.7500000596046448, + "reward_std": 0.4123912453651428, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 109 + }, + { + "clip_ratio": 0.005504293367266655, + "epoch": 0.09322033898305085, + "grad_norm": 0.09390182129772277, + "learning_rate": 1.864406779661017e-05, + "loss": 0.1017, + "step": 110 + }, + { + "clip_ratio": 0.022907190024852753, + "epoch": 0.0940677966101695, + "grad_norm": 0.08701453983072766, + "learning_rate": 1.88135593220339e-05, + "loss": 0.0999, + "step": 111 + }, + { + "clip_ratio": 0.04514092579483986, + "epoch": 0.09491525423728814, + "grad_norm": 0.08477253768734147, + "learning_rate": 1.898305084745763e-05, + "loss": 0.0987, + "step": 112 + }, + { + "clip_ratio": 0.0005664547788910568, + "completion_length": 434.39288330078125, + "epoch": 0.09576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.9152542372881357e-05, + "loss": 0.0, + "num_tokens": 888255.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 113 + }, + { + "clip_ratio": 0.0015907255001366138, + "epoch": 0.09661016949152543, + "grad_norm": 0.0, + "learning_rate": 1.9322033898305087e-05, + "loss": 0.0, + "step": 114 + }, + { + "clip_ratio": 0.003365863347426057, + "epoch": 0.09745762711864407, + "grad_norm": 0.0, + "learning_rate": 1.9491525423728814e-05, + "loss": 0.0, + "step": 115 + }, + { + "clip_ratio": 0.006915883626788855, + "epoch": 0.09830508474576272, + "grad_norm": 0.0, + "learning_rate": 1.9661016949152545e-05, + "loss": 0.0, + "step": 116 + }, + { + "clip_ratio": 0.0015928384382277727, + "completion_length": 311.08929443359375, + "epoch": 0.09915254237288136, + "grad_norm": 0.1669528890016949, + "learning_rate": 1.9830508474576275e-05, + "loss": 0.0592, + "num_tokens": 912948.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 117 + }, + { + "clip_ratio": 0.006070761010050774, + "epoch": 0.1, + "grad_norm": 0.15701074253607375, + "learning_rate": 2e-05, + "loss": 0.056, + "step": 118 + }, + { + "clip_ratio": 0.03282368928194046, + "epoch": 0.10084745762711865, + "grad_norm": 0.21942626154682726, + "learning_rate": 2.016949152542373e-05, + "loss": 0.0526, + "step": 119 + }, + { + "clip_ratio": 0.0628986731171608, + "epoch": 0.1016949152542373, + "grad_norm": 0.1568339023062343, + "learning_rate": 2.033898305084746e-05, + "loss": 0.0497, + "step": 120 + }, + { + "clip_ratio": 0.0003240547957830131, + "completion_length": 490.607177734375, + "epoch": 0.10254237288135593, + "grad_norm": 0.0, + "learning_rate": 2.0508474576271186e-05, + "loss": 0.0, + "num_tokens": 947318.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 121 + }, + { + "clip_ratio": 0.00037375700776465237, + "epoch": 0.10338983050847457, + "grad_norm": 0.0, + "learning_rate": 2.0677966101694916e-05, + "loss": 0.0, + "step": 122 + }, + { + "clip_ratio": 0.0011371899163350463, + "epoch": 0.10423728813559321, + "grad_norm": 0.0, + "learning_rate": 2.084745762711865e-05, + "loss": 0.0, + "step": 123 + }, + { + "clip_ratio": 0.0022452734410762787, + "epoch": 0.10508474576271186, + "grad_norm": 0.0, + "learning_rate": 2.1016949152542376e-05, + "loss": 0.0, + "step": 124 + }, + { + "clip_ratio": 0.004924725275486708, + "completion_length": 324.58929443359375, + "epoch": 0.1059322033898305, + "grad_norm": 0.3997089536055672, + "learning_rate": 2.1186440677966103e-05, + "loss": 0.04, + "num_tokens": 972527.0, + "reward": -0.8214285969734192, + "reward_std": 0.36553531885147095, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 125 + }, + { + "clip_ratio": 0.036066196858882904, + "epoch": 0.10677966101694915, + "grad_norm": 0.4003737832223874, + "learning_rate": 2.1355932203389833e-05, + "loss": 0.0371, + "step": 126 + }, + { + "clip_ratio": 0.06804865598678589, + "epoch": 0.10762711864406779, + "grad_norm": 0.3262616499772286, + "learning_rate": 2.152542372881356e-05, + "loss": 0.0328, + "step": 127 + }, + { + "clip_ratio": 0.08261267095804214, + "epoch": 0.10847457627118644, + "grad_norm": 0.19475445080797668, + "learning_rate": 2.169491525423729e-05, + "loss": 0.0284, + "step": 128 + }, + { + "clip_ratio": 0.00042747953557409346, + "completion_length": 441.6785888671875, + "epoch": 0.10932203389830508, + "grad_norm": 0.07121815374577634, + "learning_rate": 2.1864406779661017e-05, + "loss": 0.0215, + "num_tokens": 1005157.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 129 + }, + { + "clip_ratio": 0.0005996564286760986, + "epoch": 0.11016949152542373, + "grad_norm": 0.07374447574020743, + "learning_rate": 2.2033898305084748e-05, + "loss": 0.021, + "step": 130 + }, + { + "clip_ratio": 0.0070611475966870785, + "epoch": 0.11101694915254237, + "grad_norm": 0.0484843694410488, + "learning_rate": 2.2203389830508474e-05, + "loss": 0.02, + "step": 131 + }, + { + "clip_ratio": 0.02419929951429367, + "epoch": 0.11186440677966102, + "grad_norm": 0.03734227928764934, + "learning_rate": 2.2372881355932205e-05, + "loss": 0.0194, + "step": 132 + }, + { + "clip_ratio": 0.0008097242680378258, + "completion_length": 299.9285888671875, + "epoch": 0.11271186440677966, + "grad_norm": 0.2037296860020652, + "learning_rate": 2.2542372881355935e-05, + "loss": 0.0123, + "num_tokens": 1029577.0, + "reward": -0.4285714626312256, + "reward_std": 0.49777287244796753, + "rewards/check_winston_local_func/mean": -0.4285714328289032, + "rewards/check_winston_local_func/std": 0.9116845726966858, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 133 + }, + { + "clip_ratio": 0.010970565490424633, + "epoch": 0.1135593220338983, + "grad_norm": 0.15172018259613887, + "learning_rate": 2.2711864406779665e-05, + "loss": 0.0095, + "step": 134 + }, + { + "clip_ratio": 0.027290966361761093, + "epoch": 0.11440677966101695, + "grad_norm": 0.14632003828933562, + "learning_rate": 2.2881355932203392e-05, + "loss": 0.0066, + "step": 135 + }, + { + "clip_ratio": 0.04884405434131622, + "epoch": 0.1152542372881356, + "grad_norm": 0.13010992493757564, + "learning_rate": 2.3050847457627122e-05, + "loss": 0.0037, + "step": 136 + }, + { + "clip_ratio": 0.00016204381245188415, + "completion_length": 397.9821472167969, + "epoch": 0.11610169491525424, + "grad_norm": 0.0819715923540025, + "learning_rate": 2.322033898305085e-05, + "loss": 0.0348, + "num_tokens": 1059368.0, + "reward": -0.7500000596046448, + "reward_std": 0.3499017357826233, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 137 + }, + { + "clip_ratio": 0.0012223825324326754, + "epoch": 0.11694915254237288, + "grad_norm": 0.07970324522981491, + "learning_rate": 2.338983050847458e-05, + "loss": 0.0336, + "step": 138 + }, + { + "clip_ratio": 0.015393489971756935, + "epoch": 0.11779661016949153, + "grad_norm": 0.07570693688371119, + "learning_rate": 2.3559322033898306e-05, + "loss": 0.0321, + "step": 139 + }, + { + "clip_ratio": 0.07253921031951904, + "epoch": 0.11864406779661017, + "grad_norm": 0.05800544884381334, + "learning_rate": 2.3728813559322036e-05, + "loss": 0.0305, + "step": 140 + }, + { + "clip_ratio": 0.00020609110652003437, + "completion_length": 376.3035888671875, + "epoch": 0.11949152542372882, + "grad_norm": 0.16488571125022886, + "learning_rate": 2.3898305084745763e-05, + "loss": -0.0156, + "num_tokens": 1088561.0, + "reward": -0.5, + "reward_std": 0.686587929725647, + "rewards/check_winston_local_func/mean": -0.5, + "rewards/check_winston_local_func/std": 0.8738628625869751, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 141 + }, + { + "clip_ratio": 0.020974619314074516, + "epoch": 0.12033898305084746, + "grad_norm": 0.12033253885509411, + "learning_rate": 2.406779661016949e-05, + "loss": -0.02, + "step": 142 + }, + { + "clip_ratio": 0.14757588505744934, + "epoch": 0.1211864406779661, + "grad_norm": 0.18906094003962706, + "learning_rate": 2.4237288135593224e-05, + "loss": -0.0215, + "step": 143 + }, + { + "clip_ratio": 0.18001240491867065, + "epoch": 0.12203389830508475, + "grad_norm": 0.2094330456679022, + "learning_rate": 2.4406779661016954e-05, + "loss": -0.0238, + "step": 144 + }, + { + "clip_ratio": 0.0010827317601069808, + "completion_length": 216.85714721679688, + "epoch": 0.1228813559322034, + "grad_norm": 0.22593574409537565, + "learning_rate": 2.457627118644068e-05, + "loss": -0.057, + "num_tokens": 1107713.0, + "reward": -0.0357142873108387, + "reward_std": 0.808063805103302, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 145 + }, + { + "clip_ratio": 0.01685175858438015, + "epoch": 0.12372881355932204, + "grad_norm": 0.21920453847219976, + "learning_rate": 2.474576271186441e-05, + "loss": -0.0622, + "step": 146 + }, + { + "clip_ratio": 0.05698274075984955, + "epoch": 0.12457627118644068, + "grad_norm": 0.23790061749019706, + "learning_rate": 2.4915254237288138e-05, + "loss": -0.0672, + "step": 147 + }, + { + "clip_ratio": 0.06983836740255356, + "epoch": 0.12542372881355932, + "grad_norm": 0.19359662720887325, + "learning_rate": 2.5084745762711865e-05, + "loss": -0.0724, + "step": 148 + }, + { + "clip_ratio": 0.0013232758501544595, + "completion_length": 251.96429443359375, + "epoch": 0.12627118644067797, + "grad_norm": 0.27961740628458276, + "learning_rate": 2.5254237288135595e-05, + "loss": 0.06, + "num_tokens": 1129487.0, + "reward": -0.0357142873108387, + "reward_std": 0.9462584257125854, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 149 + }, + { + "clip_ratio": 0.03364234417676926, + "epoch": 0.1271186440677966, + "grad_norm": 0.19276991014072303, + "learning_rate": 2.5423728813559322e-05, + "loss": 0.054, + "step": 150 + }, + { + "clip_ratio": 0.1430949568748474, + "epoch": 0.12796610169491526, + "grad_norm": 0.2768368269508983, + "learning_rate": 2.5593220338983052e-05, + "loss": 0.0518, + "step": 151 + }, + { + "clip_ratio": 0.16415317356586456, + "epoch": 0.1288135593220339, + "grad_norm": 0.25743304440606246, + "learning_rate": 2.576271186440678e-05, + "loss": 0.0475, + "step": 152 + }, + { + "clip_ratio": 0.0013469145633280277, + "completion_length": 204.48214721679688, + "epoch": 0.12966101694915255, + "grad_norm": 0.28188012404317475, + "learning_rate": 2.5932203389830512e-05, + "loss": 0.0527, + "num_tokens": 1148354.0, + "reward": 0.1428571492433548, + "reward_std": 0.7129831910133362, + "rewards/check_winston_local_func/mean": 0.1428571492433548, + "rewards/check_winston_local_func/std": 0.9987004995346069, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 153 + }, + { + "clip_ratio": 0.016695290803909302, + "epoch": 0.13050847457627118, + "grad_norm": 0.2641379759457116, + "learning_rate": 2.610169491525424e-05, + "loss": 0.0473, + "step": 154 + }, + { + "clip_ratio": 0.05237039551138878, + "epoch": 0.13135593220338984, + "grad_norm": 0.20691108630731772, + "learning_rate": 2.627118644067797e-05, + "loss": 0.0414, + "step": 155 + }, + { + "clip_ratio": 0.0867982804775238, + "epoch": 0.13220338983050847, + "grad_norm": 0.15341544674011254, + "learning_rate": 2.6440677966101696e-05, + "loss": 0.0351, + "step": 156 + }, + { + "clip_ratio": 0.0006545564392581582, + "completion_length": 233.9285888671875, + "epoch": 0.13305084745762713, + "grad_norm": 0.16036976523795443, + "learning_rate": 2.6610169491525427e-05, + "loss": 0.0179, + "num_tokens": 1168622.0, + "reward": 0.7142857313156128, + "reward_std": 0.4016071856021881, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 157 + }, + { + "clip_ratio": 0.00993060227483511, + "epoch": 0.13389830508474576, + "grad_norm": 0.1298083776077636, + "learning_rate": 2.6779661016949153e-05, + "loss": 0.0151, + "step": 158 + }, + { + "clip_ratio": 0.0733163133263588, + "epoch": 0.13474576271186442, + "grad_norm": 0.11590218855503849, + "learning_rate": 2.6949152542372884e-05, + "loss": 0.0125, + "step": 159 + }, + { + "clip_ratio": 0.14935636520385742, + "epoch": 0.13559322033898305, + "grad_norm": 0.16154268567658825, + "learning_rate": 2.711864406779661e-05, + "loss": 0.011, + "step": 160 + }, + { + "clip_ratio": 0.0009650280699133873, + "completion_length": 174.7678680419922, + "epoch": 0.13644067796610168, + "grad_norm": 0.15404950919743313, + "learning_rate": 2.728813559322034e-05, + "loss": 0.0078, + "num_tokens": 1185697.0, + "reward": 0.8571429252624512, + "reward_std": 0.3342905640602112, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 161 + }, + { + "clip_ratio": 0.004791476763784885, + "epoch": 0.13728813559322034, + "grad_norm": 0.12682344230599282, + "learning_rate": 2.7457627118644068e-05, + "loss": 0.0056, + "step": 162 + }, + { + "clip_ratio": 0.023417560383677483, + "epoch": 0.13813559322033897, + "grad_norm": 0.0948693079603576, + "learning_rate": 2.76271186440678e-05, + "loss": 0.003, + "step": 163 + }, + { + "clip_ratio": 0.07911951839923859, + "epoch": 0.13898305084745763, + "grad_norm": 0.09089932231497586, + "learning_rate": 2.7796610169491528e-05, + "loss": 0.0007, + "step": 164 + }, + { + "clip_ratio": 0.000979878008365631, + "completion_length": 126.64286041259766, + "epoch": 0.13983050847457626, + "grad_norm": 0.1801163708005843, + "learning_rate": 2.7966101694915258e-05, + "loss": -0.0396, + "num_tokens": 1199565.0, + "reward": 0.7500000596046448, + "reward_std": 0.3859959840774536, + "rewards/check_winston_local_func/mean": 0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 165 + }, + { + "clip_ratio": 0.009913308545947075, + "epoch": 0.14067796610169492, + "grad_norm": 0.14588220837158195, + "learning_rate": 2.8135593220338985e-05, + "loss": -0.0428, + "step": 166 + }, + { + "clip_ratio": 0.07110879570245743, + "epoch": 0.14152542372881355, + "grad_norm": 0.276973278154756, + "learning_rate": 2.8305084745762715e-05, + "loss": -0.0441, + "step": 167 + }, + { + "clip_ratio": 0.06909574568271637, + "epoch": 0.1423728813559322, + "grad_norm": 0.12488402451050255, + "learning_rate": 2.8474576271186442e-05, + "loss": -0.0494, + "step": 168 + }, + { + "clip_ratio": 0.0003819709818344563, + "completion_length": 152.73214721679688, + "epoch": 0.14322033898305084, + "grad_norm": 0.3195642927880649, + "learning_rate": 2.8644067796610172e-05, + "loss": 0.0302, + "num_tokens": 1214790.0, + "reward": 0.7142857313156128, + "reward_std": 0.3342905342578888, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 169 + }, + { + "clip_ratio": 0.015475978143513203, + "epoch": 0.1440677966101695, + "grad_norm": 0.2360393211362849, + "learning_rate": 2.88135593220339e-05, + "loss": 0.0228, + "step": 170 + }, + { + "clip_ratio": 0.08493895828723907, + "epoch": 0.14491525423728813, + "grad_norm": 0.17350104363138513, + "learning_rate": 2.8983050847457626e-05, + "loss": 0.0163, + "step": 171 + }, + { + "clip_ratio": 0.14768318831920624, + "epoch": 0.14576271186440679, + "grad_norm": 0.19569281232532856, + "learning_rate": 2.9152542372881356e-05, + "loss": 0.013, + "step": 172 + }, + { + "clip_ratio": 0.006150017958134413, + "completion_length": 186.25001525878906, + "epoch": 0.14661016949152542, + "grad_norm": 0.06068449124289285, + "learning_rate": 2.932203389830509e-05, + "loss": -0.0169, + "num_tokens": 1232564.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 173 + }, + { + "clip_ratio": 0.014521388337016106, + "epoch": 0.14745762711864407, + "grad_norm": 0.05994459441740582, + "learning_rate": 2.9491525423728817e-05, + "loss": -0.0174, + "step": 174 + }, + { + "clip_ratio": 0.04354570060968399, + "epoch": 0.1483050847457627, + "grad_norm": 0.06278027199945566, + "learning_rate": 2.9661016949152547e-05, + "loss": -0.0183, + "step": 175 + }, + { + "clip_ratio": 0.10504651814699173, + "epoch": 0.14915254237288136, + "grad_norm": 0.04416483226500781, + "learning_rate": 2.9830508474576274e-05, + "loss": -0.0193, + "step": 176 + }, + { + "clip_ratio": 0.003162125591188669, + "completion_length": 163.17857360839844, + "epoch": 0.15, + "grad_norm": 0.11359153510598317, + "learning_rate": 3.0000000000000004e-05, + "loss": -0.0335, + "num_tokens": 1248550.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 177 + }, + { + "clip_ratio": 0.01111722644418478, + "epoch": 0.15084745762711865, + "grad_norm": 0.10416258904679447, + "learning_rate": 3.016949152542373e-05, + "loss": -0.0347, + "step": 178 + }, + { + "clip_ratio": 0.04117439687252045, + "epoch": 0.15169491525423728, + "grad_norm": 0.08204255975558637, + "learning_rate": 3.0338983050847458e-05, + "loss": -0.0364, + "step": 179 + }, + { + "clip_ratio": 0.08657827973365784, + "epoch": 0.15254237288135594, + "grad_norm": 0.08178448057500348, + "learning_rate": 3.0508474576271188e-05, + "loss": -0.038, + "step": 180 + }, + { + "clip_ratio": 0.010199248790740967, + "completion_length": 162.35714721679688, + "epoch": 0.15338983050847457, + "grad_norm": 0.4819050859019718, + "learning_rate": 3.067796610169492e-05, + "loss": 0.06, + "num_tokens": 1264994.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 181 + }, + { + "clip_ratio": 0.060436759144067764, + "epoch": 0.15423728813559323, + "grad_norm": 0.1959898799735129, + "learning_rate": 3.084745762711865e-05, + "loss": 0.0533, + "step": 182 + }, + { + "clip_ratio": 0.13463598489761353, + "epoch": 0.15508474576271186, + "grad_norm": 0.12678282333898375, + "learning_rate": 3.101694915254238e-05, + "loss": 0.0482, + "step": 183 + }, + { + "clip_ratio": 0.19176946580410004, + "epoch": 0.15593220338983052, + "grad_norm": 0.10756609820315277, + "learning_rate": 3.1186440677966106e-05, + "loss": 0.0463, + "step": 184 + }, + { + "clip_ratio": 0.0008241009199991822, + "completion_length": 237.60714721679688, + "epoch": 0.15677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.135593220338983e-05, + "loss": 0.0, + "num_tokens": 1286164.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 185 + }, + { + "clip_ratio": 0.002994579030200839, + "epoch": 0.1576271186440678, + "grad_norm": 0.0, + "learning_rate": 3.152542372881356e-05, + "loss": 0.0, + "step": 186 + }, + { + "clip_ratio": 0.00574068445712328, + "epoch": 0.15847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.169491525423729e-05, + "loss": 0.0, + "step": 187 + }, + { + "clip_ratio": 0.012791804037988186, + "epoch": 0.15932203389830507, + "grad_norm": 0.0, + "learning_rate": 3.186440677966102e-05, + "loss": 0.0, + "step": 188 + }, + { + "clip_ratio": 0.006764067802578211, + "completion_length": 143.94644165039062, + "epoch": 0.16016949152542373, + "grad_norm": 0.04704135237627796, + "learning_rate": 3.203389830508475e-05, + "loss": -0.0095, + "num_tokens": 1301409.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 189 + }, + { + "clip_ratio": 0.013139193877577782, + "epoch": 0.16101694915254236, + "grad_norm": 0.04532372769932697, + "learning_rate": 3.2203389830508473e-05, + "loss": -0.0098, + "step": 190 + }, + { + "clip_ratio": 0.03423069044947624, + "epoch": 0.16186440677966102, + "grad_norm": 0.040646403971755785, + "learning_rate": 3.237288135593221e-05, + "loss": -0.0105, + "step": 191 + }, + { + "clip_ratio": 0.06455554068088531, + "epoch": 0.16271186440677965, + "grad_norm": 0.03643001220928061, + "learning_rate": 3.2542372881355934e-05, + "loss": -0.0113, + "step": 192 + }, + { + "clip_ratio": 0.0007823093910701573, + "completion_length": 229.9107208251953, + "epoch": 0.1635593220338983, + "grad_norm": 0.0, + "learning_rate": 3.271186440677967e-05, + "loss": 0.0, + "num_tokens": 1321708.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 193 + }, + { + "clip_ratio": 0.0008988279732875526, + "epoch": 0.16440677966101694, + "grad_norm": 0.0, + "learning_rate": 3.2881355932203394e-05, + "loss": 0.0, + "step": 194 + }, + { + "clip_ratio": 0.003465626621618867, + "epoch": 0.1652542372881356, + "grad_norm": 0.0, + "learning_rate": 3.305084745762712e-05, + "loss": 0.0, + "step": 195 + }, + { + "clip_ratio": 0.008655412122607231, + "epoch": 0.16610169491525423, + "grad_norm": 0.0, + "learning_rate": 3.322033898305085e-05, + "loss": 0.0, + "step": 196 + }, + { + "clip_ratio": 0.0021059864666312933, + "completion_length": 166.85714721679688, + "epoch": 0.1669491525423729, + "grad_norm": 0.17307734331449404, + "learning_rate": 3.338983050847458e-05, + "loss": -0.0036, + "num_tokens": 1338540.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 197 + }, + { + "clip_ratio": 0.009274979121983051, + "epoch": 0.16779661016949152, + "grad_norm": 0.10860266060182006, + "learning_rate": 3.355932203389831e-05, + "loss": -0.0066, + "step": 198 + }, + { + "clip_ratio": 0.03715561330318451, + "epoch": 0.16864406779661018, + "grad_norm": 0.09136703784102146, + "learning_rate": 3.3728813559322035e-05, + "loss": -0.008, + "step": 199 + }, + { + "clip_ratio": 0.06759678572416306, + "epoch": 0.1694915254237288, + "grad_norm": 0.08121070179066665, + "learning_rate": 3.389830508474576e-05, + "loss": -0.009, + "step": 200 + }, + { + "clip_ratio": 0.0002369106950936839, + "completion_length": 132.625, + "epoch": 0.17033898305084746, + "grad_norm": 0.12222790896016958, + "learning_rate": 3.406779661016949e-05, + "loss": 0.0931, + "num_tokens": 1352735.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 201 + }, + { + "clip_ratio": 0.0015077884308993816, + "epoch": 0.1711864406779661, + "grad_norm": 0.12028738542362348, + "learning_rate": 3.423728813559322e-05, + "loss": 0.0915, + "step": 202 + }, + { + "clip_ratio": 0.011990153230726719, + "epoch": 0.17203389830508475, + "grad_norm": 0.10639183565121645, + "learning_rate": 3.4406779661016956e-05, + "loss": 0.0873, + "step": 203 + }, + { + "clip_ratio": 0.05813857913017273, + "epoch": 0.17288135593220338, + "grad_norm": 0.08983262526351615, + "learning_rate": 3.457627118644068e-05, + "loss": 0.0833, + "step": 204 + }, + { + "clip_ratio": 0.001714512356556952, + "completion_length": 90.37500762939453, + "epoch": 0.17372881355932204, + "grad_norm": 0.10843637606790192, + "learning_rate": 3.474576271186441e-05, + "loss": 0.0164, + "num_tokens": 1365892.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 205 + }, + { + "clip_ratio": 0.017641481012105942, + "epoch": 0.17457627118644067, + "grad_norm": 0.08303991648667351, + "learning_rate": 3.491525423728814e-05, + "loss": 0.0148, + "step": 206 + }, + { + "clip_ratio": 0.15765391290187836, + "epoch": 0.17542372881355933, + "grad_norm": 0.07279655924549996, + "learning_rate": 3.5084745762711864e-05, + "loss": 0.0138, + "step": 207 + }, + { + "clip_ratio": 0.2804856598377228, + "epoch": 0.17627118644067796, + "grad_norm": 0.09315271598947107, + "learning_rate": 3.52542372881356e-05, + "loss": 0.0135, + "step": 208 + }, + { + "clip_ratio": 0.003159541869536042, + "completion_length": 67.30357360839844, + "epoch": 0.17711864406779662, + "grad_norm": 0.0, + "learning_rate": 3.5423728813559324e-05, + "loss": 0.0, + "num_tokens": 1376973.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 209 + }, + { + "clip_ratio": 0.004362096078693867, + "epoch": 0.17796610169491525, + "grad_norm": 0.0, + "learning_rate": 3.559322033898305e-05, + "loss": 0.0, + "step": 210 + }, + { + "clip_ratio": 0.01770210638642311, + "epoch": 0.1788135593220339, + "grad_norm": 0.0, + "learning_rate": 3.576271186440678e-05, + "loss": 0.0, + "step": 211 + }, + { + "clip_ratio": 0.035751208662986755, + "epoch": 0.17966101694915254, + "grad_norm": 0.0, + "learning_rate": 3.593220338983051e-05, + "loss": 0.0, + "step": 212 + }, + { + "clip_ratio": 0.0035622839350253344, + "completion_length": 65.08928680419922, + "epoch": 0.1805084745762712, + "grad_norm": 0.0, + "learning_rate": 3.610169491525424e-05, + "loss": 0.0, + "num_tokens": 1387570.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 213 + }, + { + "clip_ratio": 0.0025523039512336254, + "epoch": 0.18135593220338983, + "grad_norm": 0.0, + "learning_rate": 3.627118644067797e-05, + "loss": 0.0, + "step": 214 + }, + { + "clip_ratio": 0.005835308227688074, + "epoch": 0.18220338983050846, + "grad_norm": 0.0, + "learning_rate": 3.64406779661017e-05, + "loss": 0.0, + "step": 215 + }, + { + "clip_ratio": 0.011904297396540642, + "epoch": 0.18305084745762712, + "grad_norm": 0.0, + "learning_rate": 3.6610169491525426e-05, + "loss": 0.0, + "step": 216 + }, + { + "clip_ratio": 0.0003092146071139723, + "completion_length": 51.10714340209961, + "epoch": 0.18389830508474575, + "grad_norm": 0.356952256149441, + "learning_rate": 3.677966101694915e-05, + "loss": -0.0154, + "num_tokens": 1397320.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 217 + }, + { + "clip_ratio": 0.11051050573587418, + "epoch": 0.1847457627118644, + "grad_norm": 0.1542677635762948, + "learning_rate": 3.6949152542372886e-05, + "loss": -0.019, + "step": 218 + }, + { + "clip_ratio": 0.18382969498634338, + "epoch": 0.18559322033898304, + "grad_norm": 0.13966519767464722, + "learning_rate": 3.711864406779661e-05, + "loss": -0.0211, + "step": 219 + }, + { + "clip_ratio": 0.27009809017181396, + "epoch": 0.1864406779661017, + "grad_norm": 0.08132731257822706, + "learning_rate": 3.728813559322034e-05, + "loss": -0.0238, + "step": 220 + }, + { + "clip_ratio": 0.006028716918081045, + "completion_length": 83.26786041259766, + "epoch": 0.18728813559322033, + "grad_norm": 0.0, + "learning_rate": 3.745762711864407e-05, + "loss": 0.0, + "num_tokens": 1409935.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 221 + }, + { + "clip_ratio": 0.021660711616277695, + "epoch": 0.188135593220339, + "grad_norm": 0.0, + "learning_rate": 3.76271186440678e-05, + "loss": 0.0, + "step": 222 + }, + { + "clip_ratio": 0.06699295341968536, + "epoch": 0.18898305084745762, + "grad_norm": 0.0, + "learning_rate": 3.779661016949153e-05, + "loss": 0.0, + "step": 223 + }, + { + "clip_ratio": 0.1347362995147705, + "epoch": 0.18983050847457628, + "grad_norm": 0.0, + "learning_rate": 3.796610169491526e-05, + "loss": 0.0, + "step": 224 + }, + { + "clip_ratio": 0.0006836191168986261, + "completion_length": 97.92857360839844, + "epoch": 0.1906779661016949, + "grad_norm": 0.05873233342660551, + "learning_rate": 3.813559322033899e-05, + "loss": 0.0388, + "num_tokens": 1422307.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 225 + }, + { + "clip_ratio": 0.0010315729305148125, + "epoch": 0.19152542372881357, + "grad_norm": 0.05937392738616397, + "learning_rate": 3.8305084745762714e-05, + "loss": 0.0386, + "step": 226 + }, + { + "clip_ratio": 0.01008252426981926, + "epoch": 0.1923728813559322, + "grad_norm": 0.0544038037479039, + "learning_rate": 3.847457627118644e-05, + "loss": 0.0371, + "step": 227 + }, + { + "clip_ratio": 0.0420512929558754, + "epoch": 0.19322033898305085, + "grad_norm": 0.047388133840752925, + "learning_rate": 3.8644067796610175e-05, + "loss": 0.0356, + "step": 228 + }, + { + "clip_ratio": 0.003829076187685132, + "completion_length": 48.48214340209961, + "epoch": 0.19406779661016949, + "grad_norm": 0.24338559301731436, + "learning_rate": 3.88135593220339e-05, + "loss": -0.0201, + "num_tokens": 1432182.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 229 + }, + { + "clip_ratio": 0.041657134890556335, + "epoch": 0.19491525423728814, + "grad_norm": 0.15635724094524717, + "learning_rate": 3.898305084745763e-05, + "loss": -0.026, + "step": 230 + }, + { + "clip_ratio": 0.16935327649116516, + "epoch": 0.19576271186440677, + "grad_norm": 0.11486942308015832, + "learning_rate": 3.9152542372881355e-05, + "loss": -0.0295, + "step": 231 + }, + { + "clip_ratio": 0.22958868741989136, + "epoch": 0.19661016949152543, + "grad_norm": 0.10892713241904037, + "learning_rate": 3.932203389830509e-05, + "loss": -0.0314, + "step": 232 + }, + { + "clip_ratio": 0.003605353645980358, + "completion_length": 66.10714721679688, + "epoch": 0.19745762711864406, + "grad_norm": 0.24973476558992524, + "learning_rate": 3.9491525423728816e-05, + "loss": -0.0118, + "num_tokens": 1443140.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 233 + }, + { + "clip_ratio": 0.029195427894592285, + "epoch": 0.19830508474576272, + "grad_norm": 0.1595699714332021, + "learning_rate": 3.966101694915255e-05, + "loss": -0.0189, + "step": 234 + }, + { + "clip_ratio": 0.10283487290143967, + "epoch": 0.19915254237288135, + "grad_norm": 0.11474727019285232, + "learning_rate": 3.9830508474576276e-05, + "loss": -0.0247, + "step": 235 + }, + { + "clip_ratio": 0.15862122178077698, + "epoch": 0.2, + "grad_norm": 0.10043744347803148, + "learning_rate": 4e-05, + "loss": -0.029, + "step": 236 + }, + { + "clip_ratio": 0.0013605443527922034, + "completion_length": 74.55357360839844, + "epoch": 0.20084745762711864, + "grad_norm": 0.13987954732136554, + "learning_rate": 3.9981167608286254e-05, + "loss": -0.0433, + "num_tokens": 1454515.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 237 + }, + { + "clip_ratio": 0.010904515162110329, + "epoch": 0.2016949152542373, + "grad_norm": 0.10644135758363778, + "learning_rate": 3.9962335216572505e-05, + "loss": -0.0464, + "step": 238 + }, + { + "clip_ratio": 0.05173995718359947, + "epoch": 0.20254237288135593, + "grad_norm": 0.08987160994189367, + "learning_rate": 3.994350282485876e-05, + "loss": -0.0494, + "step": 239 + }, + { + "clip_ratio": 0.10260221362113953, + "epoch": 0.2033898305084746, + "grad_norm": 0.07087528775663905, + "learning_rate": 3.9924670433145014e-05, + "loss": -0.0523, + "step": 240 + }, + { + "clip_ratio": 0.0006479613948613405, + "completion_length": 73.28572082519531, + "epoch": 0.20423728813559322, + "grad_norm": 0.3445626306759668, + "learning_rate": 3.9905838041431265e-05, + "loss": 0.0365, + "num_tokens": 1466203.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 241 + }, + { + "clip_ratio": 0.061018019914627075, + "epoch": 0.20508474576271185, + "grad_norm": 0.17586034661375718, + "learning_rate": 3.9887005649717516e-05, + "loss": 0.0285, + "step": 242 + }, + { + "clip_ratio": 0.21443673968315125, + "epoch": 0.2059322033898305, + "grad_norm": 0.16852265377216533, + "learning_rate": 3.986817325800377e-05, + "loss": 0.0245, + "step": 243 + }, + { + "clip_ratio": 0.3032749891281128, + "epoch": 0.20677966101694914, + "grad_norm": 0.16538030606379006, + "learning_rate": 3.984934086629002e-05, + "loss": 0.0219, + "step": 244 + }, + { + "clip_ratio": 0.001159251551143825, + "completion_length": 80.375, + "epoch": 0.2076271186440678, + "grad_norm": 0.17416776142167675, + "learning_rate": 3.9830508474576276e-05, + "loss": -0.0293, + "num_tokens": 1477712.0, + "reward": 0.8928571939468384, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 245 + }, + { + "clip_ratio": 0.018606197088956833, + "epoch": 0.20847457627118643, + "grad_norm": 0.13288705840160372, + "learning_rate": 3.981167608286253e-05, + "loss": -0.0342, + "step": 246 + }, + { + "clip_ratio": 0.07409250736236572, + "epoch": 0.2093220338983051, + "grad_norm": 0.11062194988477761, + "learning_rate": 3.979284369114878e-05, + "loss": -0.0387, + "step": 247 + }, + { + "clip_ratio": 0.13684464991092682, + "epoch": 0.21016949152542372, + "grad_norm": 0.09630587836377022, + "learning_rate": 3.9774011299435036e-05, + "loss": -0.0423, + "step": 248 + }, + { + "clip_ratio": 0.0009059179574251175, + "completion_length": 55.892860412597656, + "epoch": 0.21101694915254238, + "grad_norm": 0.30277389882138056, + "learning_rate": 3.975517890772128e-05, + "loss": -0.0232, + "num_tokens": 1489946.0, + "reward": 0.8214285969734192, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": 0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 249 + }, + { + "clip_ratio": 0.05273974686861038, + "epoch": 0.211864406779661, + "grad_norm": 0.20551737732816863, + "learning_rate": 3.973634651600754e-05, + "loss": -0.0352, + "step": 250 + }, + { + "clip_ratio": 0.15495876967906952, + "epoch": 0.21271186440677967, + "grad_norm": 0.23192855972985502, + "learning_rate": 3.971751412429379e-05, + "loss": -0.0428, + "step": 251 + }, + { + "clip_ratio": 0.17651182413101196, + "epoch": 0.2135593220338983, + "grad_norm": 0.153802982923592, + "learning_rate": 3.969868173258004e-05, + "loss": -0.0503, + "step": 252 + }, + { + "clip_ratio": 0.001365313190035522, + "completion_length": 74.5, + "epoch": 0.21440677966101696, + "grad_norm": 0.1294886128912843, + "learning_rate": 3.967984934086629e-05, + "loss": -0.0165, + "num_tokens": 1501150.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 253 + }, + { + "clip_ratio": 0.029109954833984375, + "epoch": 0.21525423728813559, + "grad_norm": 0.07796443960667383, + "learning_rate": 3.966101694915255e-05, + "loss": -0.0187, + "step": 254 + }, + { + "clip_ratio": 0.1373310536146164, + "epoch": 0.21610169491525424, + "grad_norm": 0.0719203190228422, + "learning_rate": 3.9642184557438794e-05, + "loss": -0.0211, + "step": 255 + }, + { + "clip_ratio": 0.24434244632720947, + "epoch": 0.21694915254237288, + "grad_norm": 0.0766668656235949, + "learning_rate": 3.962335216572505e-05, + "loss": -0.023, + "step": 256 + }, + { + "clip_ratio": 0.0014079277170822024, + "completion_length": 76.76786041259766, + "epoch": 0.21779661016949153, + "grad_norm": 0.14896557612902658, + "learning_rate": 3.96045197740113e-05, + "loss": -0.048, + "num_tokens": 1513457.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 257 + }, + { + "clip_ratio": 0.008702627383172512, + "epoch": 0.21864406779661016, + "grad_norm": 0.12790944068712845, + "learning_rate": 3.9585687382297554e-05, + "loss": -0.0515, + "step": 258 + }, + { + "clip_ratio": 0.05537901073694229, + "epoch": 0.21949152542372882, + "grad_norm": 0.0901106115692995, + "learning_rate": 3.956685499058381e-05, + "loss": -0.0559, + "step": 259 + }, + { + "clip_ratio": 0.12627661228179932, + "epoch": 0.22033898305084745, + "grad_norm": 0.08842019141814955, + "learning_rate": 3.954802259887006e-05, + "loss": -0.0589, + "step": 260 + }, + { + "clip_ratio": 0.001680672401562333, + "completion_length": 68.46428680419922, + "epoch": 0.2211864406779661, + "grad_norm": 0.45455295411779023, + "learning_rate": 3.9529190207156314e-05, + "loss": 0.0253, + "num_tokens": 1523675.0, + "reward": 0.785714328289032, + "reward_std": 0.35475122928619385, + "rewards/check_winston_local_func/mean": 0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241878271102905, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 261 + }, + { + "clip_ratio": 0.08457090705633163, + "epoch": 0.22203389830508474, + "grad_norm": 0.2750192097608069, + "learning_rate": 3.9510357815442565e-05, + "loss": 0.0122, + "step": 262 + }, + { + "clip_ratio": 0.14618617296218872, + "epoch": 0.2228813559322034, + "grad_norm": 0.21776056884947845, + "learning_rate": 3.9491525423728816e-05, + "loss": 0.0035, + "step": 263 + }, + { + "clip_ratio": 0.16797243058681488, + "epoch": 0.22372881355932203, + "grad_norm": 0.1552963639704198, + "learning_rate": 3.947269303201507e-05, + "loss": -0.0027, + "step": 264 + }, + { + "clip_ratio": 0.0041149333119392395, + "completion_length": 92.05357360839844, + "epoch": 0.2245762711864407, + "grad_norm": 0.0, + "learning_rate": 3.9453860640301325e-05, + "loss": 0.0, + "num_tokens": 1537926.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 265 + }, + { + "clip_ratio": 0.015582824125885963, + "epoch": 0.22542372881355932, + "grad_norm": 0.0, + "learning_rate": 3.943502824858757e-05, + "loss": 0.0, + "step": 266 + }, + { + "clip_ratio": 0.05549276992678642, + "epoch": 0.22627118644067798, + "grad_norm": 0.0, + "learning_rate": 3.941619585687383e-05, + "loss": 0.0, + "step": 267 + }, + { + "clip_ratio": 0.10348478704690933, + "epoch": 0.2271186440677966, + "grad_norm": 0.0, + "learning_rate": 3.939736346516008e-05, + "loss": 0.0, + "step": 268 + }, + { + "clip_ratio": 0.0024110758677124977, + "completion_length": 132.5357208251953, + "epoch": 0.22796610169491524, + "grad_norm": 0.09866009376925343, + "learning_rate": 3.937853107344633e-05, + "loss": -0.0252, + "num_tokens": 1552892.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 269 + }, + { + "clip_ratio": 0.009020349942147732, + "epoch": 0.2288135593220339, + "grad_norm": 0.08611769992817986, + "learning_rate": 3.935969868173259e-05, + "loss": -0.0267, + "step": 270 + }, + { + "clip_ratio": 0.03130246326327324, + "epoch": 0.22966101694915253, + "grad_norm": 0.08283957691220468, + "learning_rate": 3.934086629001884e-05, + "loss": -0.0288, + "step": 271 + }, + { + "clip_ratio": 0.06420135498046875, + "epoch": 0.2305084745762712, + "grad_norm": 0.07319871503015539, + "learning_rate": 3.932203389830509e-05, + "loss": -0.0314, + "step": 272 + }, + { + "clip_ratio": 0.0030047716572880745, + "completion_length": 85.9464340209961, + "epoch": 0.23135593220338982, + "grad_norm": 0.42479217252605955, + "learning_rate": 3.930320150659134e-05, + "loss": 0.0151, + "num_tokens": 1564737.0, + "reward": 0.8571429252624512, + "reward_std": 0.3342905342578888, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 273 + }, + { + "clip_ratio": 0.046567775309085846, + "epoch": 0.23220338983050848, + "grad_norm": 0.25875493543994865, + "learning_rate": 3.928436911487759e-05, + "loss": 0.0039, + "step": 274 + }, + { + "clip_ratio": 0.11683137714862823, + "epoch": 0.2330508474576271, + "grad_norm": 0.15568587648106266, + "learning_rate": 3.926553672316384e-05, + "loss": -0.0039, + "step": 275 + }, + { + "clip_ratio": 0.1598564237356186, + "epoch": 0.23389830508474577, + "grad_norm": 0.12141989924883649, + "learning_rate": 3.92467043314501e-05, + "loss": -0.0076, + "step": 276 + }, + { + "clip_ratio": 0.001374253653921187, + "completion_length": 122.96429443359375, + "epoch": 0.2347457627118644, + "grad_norm": 0.18782615643703865, + "learning_rate": 3.922787193973635e-05, + "loss": -0.026, + "num_tokens": 1579727.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 277 + }, + { + "clip_ratio": 0.017132315784692764, + "epoch": 0.23559322033898306, + "grad_norm": 0.12401604739808049, + "learning_rate": 3.92090395480226e-05, + "loss": -0.031, + "step": 278 + }, + { + "clip_ratio": 0.05705662816762924, + "epoch": 0.2364406779661017, + "grad_norm": 0.0762353013620226, + "learning_rate": 3.919020715630885e-05, + "loss": -0.034, + "step": 279 + }, + { + "clip_ratio": 0.09824671596288681, + "epoch": 0.23728813559322035, + "grad_norm": 0.07414316824181627, + "learning_rate": 3.9171374764595104e-05, + "loss": -0.0361, + "step": 280 + }, + { + "clip_ratio": 0.006891847122460604, + "completion_length": 111.14286041259766, + "epoch": 0.23813559322033898, + "grad_norm": 0.44887370177976565, + "learning_rate": 3.9152542372881355e-05, + "loss": -0.0147, + "num_tokens": 1592855.0, + "reward": 0.7500000596046448, + "reward_std": 0.637336254119873, + "rewards/check_winston_local_func/mean": 0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 281 + }, + { + "clip_ratio": 0.0862836092710495, + "epoch": 0.23898305084745763, + "grad_norm": 0.2905997622704694, + "learning_rate": 3.913370998116761e-05, + "loss": -0.0255, + "step": 282 + }, + { + "clip_ratio": 0.13213881850242615, + "epoch": 0.23983050847457626, + "grad_norm": 0.20046065755709935, + "learning_rate": 3.9114877589453864e-05, + "loss": -0.036, + "step": 283 + }, + { + "clip_ratio": 0.20514217019081116, + "epoch": 0.24067796610169492, + "grad_norm": 0.17822083347245274, + "learning_rate": 3.9096045197740115e-05, + "loss": -0.0417, + "step": 284 + }, + { + "clip_ratio": 0.0007382220355793834, + "completion_length": 90.14286041259766, + "epoch": 0.24152542372881355, + "grad_norm": 0.05844943977633127, + "learning_rate": 3.907721280602637e-05, + "loss": -0.0145, + "num_tokens": 1605287.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 285 + }, + { + "clip_ratio": 0.006259975954890251, + "epoch": 0.2423728813559322, + "grad_norm": 0.04883518588447698, + "learning_rate": 3.905838041431262e-05, + "loss": -0.015, + "step": 286 + }, + { + "clip_ratio": 0.023042459040880203, + "epoch": 0.24322033898305084, + "grad_norm": 0.04225419018938037, + "learning_rate": 3.9039548022598875e-05, + "loss": -0.0156, + "step": 287 + }, + { + "clip_ratio": 0.04386242851614952, + "epoch": 0.2440677966101695, + "grad_norm": 0.03866602121110847, + "learning_rate": 3.9020715630885127e-05, + "loss": -0.0163, + "step": 288 + }, + { + "clip_ratio": 0.0029555519577115774, + "completion_length": 144.98214721679688, + "epoch": 0.24491525423728813, + "grad_norm": 0.1262690850004453, + "learning_rate": 3.900188323917138e-05, + "loss": -0.0343, + "num_tokens": 1620742.0, + "reward": 0.8571429252624512, + "reward_std": 0.4040610194206238, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 289 + }, + { + "clip_ratio": 0.0132750254124403, + "epoch": 0.2457627118644068, + "grad_norm": 0.10191416605166659, + "learning_rate": 3.898305084745763e-05, + "loss": -0.0377, + "step": 290 + }, + { + "clip_ratio": 0.043000176548957825, + "epoch": 0.24661016949152542, + "grad_norm": 0.08414775760035112, + "learning_rate": 3.896421845574388e-05, + "loss": -0.0405, + "step": 291 + }, + { + "clip_ratio": 0.07677298784255981, + "epoch": 0.24745762711864408, + "grad_norm": 0.07673330413564883, + "learning_rate": 3.894538606403013e-05, + "loss": -0.0436, + "step": 292 + }, + { + "clip_ratio": 0.0007677033427171409, + "completion_length": 314.64288330078125, + "epoch": 0.2483050847457627, + "grad_norm": 0.05727067166697756, + "learning_rate": 3.892655367231639e-05, + "loss": 0.0061, + "num_tokens": 1645538.0, + "reward": 0.8571429252624512, + "reward_std": 0.15272071957588196, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 293 + }, + { + "clip_ratio": 0.000989561784081161, + "epoch": 0.24915254237288137, + "grad_norm": 0.05658381403070837, + "learning_rate": 3.890772128060264e-05, + "loss": 0.0055, + "step": 294 + }, + { + "clip_ratio": 0.002105026040226221, + "epoch": 0.25, + "grad_norm": 0.05358318750720369, + "learning_rate": 3.888888888888889e-05, + "loss": 0.0045, + "step": 295 + }, + { + "clip_ratio": 0.008737047202885151, + "epoch": 0.25084745762711863, + "grad_norm": 0.04703537375755522, + "learning_rate": 3.887005649717515e-05, + "loss": 0.003, + "step": 296 + }, + { + "clip_ratio": 0.001839210744947195, + "completion_length": 134.08929443359375, + "epoch": 0.25169491525423726, + "grad_norm": 0.13649134617830305, + "learning_rate": 3.885122410546139e-05, + "loss": -0.0194, + "num_tokens": 1660599.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 297 + }, + { + "clip_ratio": 0.009821072220802307, + "epoch": 0.25254237288135595, + "grad_norm": 0.0691252012642643, + "learning_rate": 3.883239171374765e-05, + "loss": -0.0207, + "step": 298 + }, + { + "clip_ratio": 0.027830438688397408, + "epoch": 0.2533898305084746, + "grad_norm": 0.04974246695392892, + "learning_rate": 3.88135593220339e-05, + "loss": -0.0215, + "step": 299 + }, + { + "clip_ratio": 0.05817332863807678, + "epoch": 0.2542372881355932, + "grad_norm": 0.04524622630022836, + "learning_rate": 3.879472693032015e-05, + "loss": -0.022, + "step": 300 + }, + { + "clip_ratio": 0.001304431352764368, + "completion_length": 191.7678680419922, + "epoch": 0.25508474576271184, + "grad_norm": 0.07787541743964174, + "learning_rate": 3.8775894538606404e-05, + "loss": -0.0228, + "num_tokens": 1680114.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 301 + }, + { + "clip_ratio": 0.004079771228134632, + "epoch": 0.2559322033898305, + "grad_norm": 0.05792631765695611, + "learning_rate": 3.875706214689266e-05, + "loss": -0.0237, + "step": 302 + }, + { + "clip_ratio": 0.01483174879103899, + "epoch": 0.25677966101694916, + "grad_norm": 0.04458591657155666, + "learning_rate": 3.8738229755178906e-05, + "loss": -0.0243, + "step": 303 + }, + { + "clip_ratio": 0.03137718886137009, + "epoch": 0.2576271186440678, + "grad_norm": 0.03880399913720864, + "learning_rate": 3.8719397363465164e-05, + "loss": -0.0248, + "step": 304 + }, + { + "clip_ratio": 0.0008109563495963812, + "completion_length": 160.80357360839844, + "epoch": 0.2584745762711864, + "grad_norm": 0.20027349236527825, + "learning_rate": 3.8700564971751415e-05, + "loss": -0.0224, + "num_tokens": 1697895.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 305 + }, + { + "clip_ratio": 0.0038879578933119774, + "epoch": 0.2593220338983051, + "grad_norm": 0.07886223922966976, + "learning_rate": 3.8681732580037666e-05, + "loss": -0.0235, + "step": 306 + }, + { + "clip_ratio": 0.014749433845281601, + "epoch": 0.26016949152542374, + "grad_norm": 0.06786394961277091, + "learning_rate": 3.8662900188323924e-05, + "loss": -0.0247, + "step": 307 + }, + { + "clip_ratio": 0.03604491055011749, + "epoch": 0.26101694915254237, + "grad_norm": 0.041254536906425165, + "learning_rate": 3.8644067796610175e-05, + "loss": -0.0257, + "step": 308 + }, + { + "clip_ratio": 0.000806977681349963, + "completion_length": 233.62501525878906, + "epoch": 0.261864406779661, + "grad_norm": 0.08944516343119709, + "learning_rate": 3.8625235404896426e-05, + "loss": -0.0013, + "num_tokens": 1719706.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 309 + }, + { + "clip_ratio": 0.0043969168327748775, + "epoch": 0.2627118644067797, + "grad_norm": 0.08008566583785545, + "learning_rate": 3.860640301318268e-05, + "loss": -0.0023, + "step": 310 + }, + { + "clip_ratio": 0.016115259379148483, + "epoch": 0.2635593220338983, + "grad_norm": 0.06746261744912278, + "learning_rate": 3.858757062146893e-05, + "loss": -0.0044, + "step": 311 + }, + { + "clip_ratio": 0.029712393879890442, + "epoch": 0.26440677966101694, + "grad_norm": 0.05954201685230599, + "learning_rate": 3.856873822975518e-05, + "loss": -0.0066, + "step": 312 + }, + { + "clip_ratio": 0.0019188802689313889, + "completion_length": 118.87500762939453, + "epoch": 0.2652542372881356, + "grad_norm": 0.2459608147132861, + "learning_rate": 3.854990583804144e-05, + "loss": 0.0138, + "num_tokens": 1733891.0, + "reward": 0.6785714626312256, + "reward_std": 0.39675766229629517, + "rewards/check_winston_local_func/mean": 0.6785714030265808, + "rewards/check_winston_local_func/std": 0.7411819696426392, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 313 + }, + { + "clip_ratio": 0.011992106214165688, + "epoch": 0.26610169491525426, + "grad_norm": 0.1848848068319577, + "learning_rate": 3.853107344632769e-05, + "loss": 0.0079, + "step": 314 + }, + { + "clip_ratio": 0.05709777772426605, + "epoch": 0.2669491525423729, + "grad_norm": 0.146233205223168, + "learning_rate": 3.851224105461394e-05, + "loss": 0.0011, + "step": 315 + }, + { + "clip_ratio": 0.10807797312736511, + "epoch": 0.2677966101694915, + "grad_norm": 0.16941844805388373, + "learning_rate": 3.849340866290019e-05, + "loss": -0.0039, + "step": 316 + }, + { + "clip_ratio": 0.0004728636995423585, + "completion_length": 202.80357360839844, + "epoch": 0.26864406779661015, + "grad_norm": 0.0, + "learning_rate": 3.847457627118644e-05, + "loss": 0.0, + "num_tokens": 1753696.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 317 + }, + { + "clip_ratio": 0.0017543105641379952, + "epoch": 0.26949152542372884, + "grad_norm": 0.0, + "learning_rate": 3.84557438794727e-05, + "loss": 0.0, + "step": 318 + }, + { + "clip_ratio": 0.0028700276743620634, + "epoch": 0.27033898305084747, + "grad_norm": 0.0, + "learning_rate": 3.843691148775895e-05, + "loss": 0.0, + "step": 319 + }, + { + "clip_ratio": 0.005041220691055059, + "epoch": 0.2711864406779661, + "grad_norm": 0.0, + "learning_rate": 3.84180790960452e-05, + "loss": 0.0, + "step": 320 + }, + { + "clip_ratio": 0.0006087662768550217, + "completion_length": 246.4285888671875, + "epoch": 0.27203389830508473, + "grad_norm": 0.02696783441349338, + "learning_rate": 3.839924670433145e-05, + "loss": -0.0045, + "num_tokens": 1776128.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 321 + }, + { + "clip_ratio": 0.0007834911812096834, + "epoch": 0.27288135593220336, + "grad_norm": 0.026934160082675403, + "learning_rate": 3.8380414312617703e-05, + "loss": -0.0046, + "step": 322 + }, + { + "clip_ratio": 0.0016883478965610266, + "epoch": 0.27372881355932205, + "grad_norm": 0.026427549336641994, + "learning_rate": 3.8361581920903955e-05, + "loss": -0.0047, + "step": 323 + }, + { + "clip_ratio": 0.004345850553363562, + "epoch": 0.2745762711864407, + "grad_norm": 0.026125606288862162, + "learning_rate": 3.834274952919021e-05, + "loss": -0.0052, + "step": 324 + }, + { + "clip_ratio": 0.0007442247588187456, + "completion_length": 262.21429443359375, + "epoch": 0.2754237288135593, + "grad_norm": 0.0, + "learning_rate": 3.8323917137476463e-05, + "loss": 0.0, + "num_tokens": 1797244.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 325 + }, + { + "clip_ratio": 0.000711060652974993, + "epoch": 0.27627118644067794, + "grad_norm": 0.0, + "learning_rate": 3.8305084745762714e-05, + "loss": 0.0, + "step": 326 + }, + { + "clip_ratio": 0.0014260082971304655, + "epoch": 0.2771186440677966, + "grad_norm": 0.0, + "learning_rate": 3.828625235404897e-05, + "loss": 0.0, + "step": 327 + }, + { + "clip_ratio": 0.001994561171159148, + "epoch": 0.27796610169491526, + "grad_norm": 0.0, + "learning_rate": 3.826741996233522e-05, + "loss": 0.0, + "step": 328 + }, + { + "clip_ratio": 0.001084706513211131, + "completion_length": 172.08929443359375, + "epoch": 0.2788135593220339, + "grad_norm": 0.0876294357565793, + "learning_rate": 3.8248587570621474e-05, + "loss": 0.0001, + "num_tokens": 1813569.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 329 + }, + { + "clip_ratio": 0.002805770607665181, + "epoch": 0.2796610169491525, + "grad_norm": 0.08561321931994853, + "learning_rate": 3.8229755178907726e-05, + "loss": -0.001, + "step": 330 + }, + { + "clip_ratio": 0.015290237963199615, + "epoch": 0.2805084745762712, + "grad_norm": 0.05204164151508037, + "learning_rate": 3.8210922787193977e-05, + "loss": -0.0022, + "step": 331 + }, + { + "clip_ratio": 0.02808833308517933, + "epoch": 0.28135593220338984, + "grad_norm": 0.047916681538162136, + "learning_rate": 3.819209039548023e-05, + "loss": -0.0028, + "step": 332 + }, + { + "clip_ratio": 0.00172739801928401, + "completion_length": 148.0178680419922, + "epoch": 0.28220338983050847, + "grad_norm": 0.0, + "learning_rate": 3.8173258003766486e-05, + "loss": 0.0, + "num_tokens": 1829578.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 333 + }, + { + "clip_ratio": 0.006786983925849199, + "epoch": 0.2830508474576271, + "grad_norm": 0.0, + "learning_rate": 3.815442561205273e-05, + "loss": 0.0, + "step": 334 + }, + { + "clip_ratio": 0.018832042813301086, + "epoch": 0.2838983050847458, + "grad_norm": 0.0, + "learning_rate": 3.813559322033899e-05, + "loss": 0.0, + "step": 335 + }, + { + "clip_ratio": 0.03414842113852501, + "epoch": 0.2847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.811676082862524e-05, + "loss": 0.0, + "step": 336 + }, + { + "clip_ratio": 0.0009491202072240412, + "completion_length": 218.2678680419922, + "epoch": 0.28559322033898304, + "grad_norm": 0.0, + "learning_rate": 3.809792843691149e-05, + "loss": 0.0, + "num_tokens": 1849113.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 337 + }, + { + "clip_ratio": 0.0021562932524830103, + "epoch": 0.2864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.807909604519775e-05, + "loss": 0.0, + "step": 338 + }, + { + "clip_ratio": 0.0030712890438735485, + "epoch": 0.28728813559322036, + "grad_norm": 0.0, + "learning_rate": 3.8060263653484e-05, + "loss": 0.0, + "step": 339 + }, + { + "clip_ratio": 0.0054122223518788815, + "epoch": 0.288135593220339, + "grad_norm": 0.0, + "learning_rate": 3.804143126177025e-05, + "loss": 0.0, + "step": 340 + }, + { + "clip_ratio": 0.00150212156586349, + "completion_length": 244.96429443359375, + "epoch": 0.2889830508474576, + "grad_norm": 0.0, + "learning_rate": 3.80225988700565e-05, + "loss": 0.0, + "num_tokens": 1869743.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 341 + }, + { + "clip_ratio": 0.0009405449964106083, + "epoch": 0.28983050847457625, + "grad_norm": 0.0, + "learning_rate": 3.800376647834275e-05, + "loss": 0.0, + "step": 342 + }, + { + "clip_ratio": 0.0021599442698061466, + "epoch": 0.29067796610169494, + "grad_norm": 0.0, + "learning_rate": 3.7984934086629e-05, + "loss": 0.0, + "step": 343 + }, + { + "clip_ratio": 0.002221164293587208, + "epoch": 0.29152542372881357, + "grad_norm": 0.0, + "learning_rate": 3.796610169491526e-05, + "loss": 0.0, + "step": 344 + }, + { + "clip_ratio": 0.0013717131223529577, + "completion_length": 223.3035888671875, + "epoch": 0.2923728813559322, + "grad_norm": 0.12633963981780688, + "learning_rate": 3.7947269303201505e-05, + "loss": 0.0043, + "num_tokens": 1889048.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 345 + }, + { + "clip_ratio": 0.004998536314815283, + "epoch": 0.29322033898305083, + "grad_norm": 0.09918328474458112, + "learning_rate": 3.792843691148776e-05, + "loss": 0.0014, + "step": 346 + }, + { + "clip_ratio": 0.013239889405667782, + "epoch": 0.2940677966101695, + "grad_norm": 0.07460521909248337, + "learning_rate": 3.7909604519774014e-05, + "loss": -0.0005, + "step": 347 + }, + { + "clip_ratio": 0.033433884382247925, + "epoch": 0.29491525423728815, + "grad_norm": 0.06058703312441606, + "learning_rate": 3.7890772128060265e-05, + "loss": -0.002, + "step": 348 + }, + { + "clip_ratio": 0.003388527315109968, + "completion_length": 246.87501525878906, + "epoch": 0.2957627118644068, + "grad_norm": 0.0, + "learning_rate": 3.787193973634652e-05, + "loss": 0.0, + "num_tokens": 1909737.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 349 + }, + { + "clip_ratio": 0.003924295771867037, + "epoch": 0.2966101694915254, + "grad_norm": 0.0, + "learning_rate": 3.7853107344632774e-05, + "loss": 0.0, + "step": 350 + }, + { + "clip_ratio": 0.004607200622558594, + "epoch": 0.29745762711864404, + "grad_norm": 0.0, + "learning_rate": 3.7834274952919025e-05, + "loss": 0.0, + "step": 351 + }, + { + "clip_ratio": 0.007875598035752773, + "epoch": 0.2983050847457627, + "grad_norm": 0.0, + "learning_rate": 3.7815442561205276e-05, + "loss": 0.0, + "step": 352 + }, + { + "clip_ratio": 0.0009100620518438518, + "completion_length": 221.5178680419922, + "epoch": 0.29915254237288136, + "grad_norm": 0.0, + "learning_rate": 3.779661016949153e-05, + "loss": 0.0, + "num_tokens": 1930142.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 353 + }, + { + "clip_ratio": 0.002130310283973813, + "epoch": 0.3, + "grad_norm": 0.0, + "learning_rate": 3.777777777777778e-05, + "loss": 0.0, + "step": 354 + }, + { + "clip_ratio": 0.005219562910497189, + "epoch": 0.3008474576271186, + "grad_norm": 0.0, + "learning_rate": 3.7758945386064036e-05, + "loss": 0.0, + "step": 355 + }, + { + "clip_ratio": 0.007768368814140558, + "epoch": 0.3016949152542373, + "grad_norm": 0.0, + "learning_rate": 3.774011299435029e-05, + "loss": 0.0, + "step": 356 + }, + { + "clip_ratio": 0.002148033818230033, + "completion_length": 170.1607208251953, + "epoch": 0.30254237288135594, + "grad_norm": 0.15185128372630483, + "learning_rate": 3.772128060263654e-05, + "loss": -0.0055, + "num_tokens": 1947871.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 357 + }, + { + "clip_ratio": 0.0094491271302104, + "epoch": 0.30338983050847457, + "grad_norm": 0.08593602543887907, + "learning_rate": 3.770244821092279e-05, + "loss": -0.0077, + "step": 358 + }, + { + "clip_ratio": 0.06222861260175705, + "epoch": 0.3042372881355932, + "grad_norm": 0.06218840086209775, + "learning_rate": 3.768361581920904e-05, + "loss": -0.0087, + "step": 359 + }, + { + "clip_ratio": 0.10356654226779938, + "epoch": 0.3050847457627119, + "grad_norm": 0.07005653665235588, + "learning_rate": 3.766478342749529e-05, + "loss": -0.0091, + "step": 360 + }, + { + "clip_ratio": 0.0011975433444604278, + "completion_length": 213.58929443359375, + "epoch": 0.3059322033898305, + "grad_norm": 0.0, + "learning_rate": 3.764595103578155e-05, + "loss": 0.0, + "num_tokens": 1967888.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 361 + }, + { + "clip_ratio": 0.0017720028990879655, + "epoch": 0.30677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.76271186440678e-05, + "loss": 0.0, + "step": 362 + }, + { + "clip_ratio": 0.002977039897814393, + "epoch": 0.3076271186440678, + "grad_norm": 0.0, + "learning_rate": 3.760828625235405e-05, + "loss": 0.0, + "step": 363 + }, + { + "clip_ratio": 0.0051023694686591625, + "epoch": 0.30847457627118646, + "grad_norm": 0.0, + "learning_rate": 3.758945386064031e-05, + "loss": 0.0, + "step": 364 + }, + { + "clip_ratio": 0.00277900043874979, + "completion_length": 175.7857208251953, + "epoch": 0.3093220338983051, + "grad_norm": 0.0, + "learning_rate": 3.7570621468926554e-05, + "loss": 0.0, + "num_tokens": 1985452.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 365 + }, + { + "clip_ratio": 0.0037012884858995676, + "epoch": 0.3101694915254237, + "grad_norm": 0.0, + "learning_rate": 3.755178907721281e-05, + "loss": 0.0, + "step": 366 + }, + { + "clip_ratio": 0.003547186963260174, + "epoch": 0.31101694915254235, + "grad_norm": 0.0, + "learning_rate": 3.753295668549906e-05, + "loss": 0.0, + "step": 367 + }, + { + "clip_ratio": 0.005322239827364683, + "epoch": 0.31186440677966104, + "grad_norm": 0.0, + "learning_rate": 3.7514124293785313e-05, + "loss": 0.0, + "step": 368 + }, + { + "clip_ratio": 0.004034657031297684, + "completion_length": 110.10714721679688, + "epoch": 0.31271186440677967, + "grad_norm": 0.12635496286827033, + "learning_rate": 3.7495291902071565e-05, + "loss": -0.0029, + "num_tokens": 1998890.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 369 + }, + { + "clip_ratio": 0.009292816743254662, + "epoch": 0.3135593220338983, + "grad_norm": 0.09312227257169194, + "learning_rate": 3.7476459510357816e-05, + "loss": -0.0051, + "step": 370 + }, + { + "clip_ratio": 0.03155684098601341, + "epoch": 0.31440677966101693, + "grad_norm": 0.05768111194452057, + "learning_rate": 3.745762711864407e-05, + "loss": -0.0068, + "step": 371 + }, + { + "clip_ratio": 0.0636262521147728, + "epoch": 0.3152542372881356, + "grad_norm": 0.047107030238279814, + "learning_rate": 3.7438794726930325e-05, + "loss": -0.0077, + "step": 372 + }, + { + "clip_ratio": 0.001232151291333139, + "completion_length": 234.87501525878906, + "epoch": 0.31610169491525425, + "grad_norm": 0.0, + "learning_rate": 3.7419962335216576e-05, + "loss": 0.0, + "num_tokens": 2018547.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 373 + }, + { + "clip_ratio": 0.000776052416767925, + "epoch": 0.3169491525423729, + "grad_norm": 0.0, + "learning_rate": 3.740112994350283e-05, + "loss": 0.0, + "step": 374 + }, + { + "clip_ratio": 0.0008471307810395956, + "epoch": 0.3177966101694915, + "grad_norm": 0.0, + "learning_rate": 3.7382297551789085e-05, + "loss": 0.0, + "step": 375 + }, + { + "clip_ratio": 0.0025251915212720633, + "epoch": 0.31864406779661014, + "grad_norm": 0.0, + "learning_rate": 3.736346516007533e-05, + "loss": 0.0, + "step": 376 + }, + { + "clip_ratio": 0.0007399375317618251, + "completion_length": 256.4821472167969, + "epoch": 0.31949152542372883, + "grad_norm": 0.0, + "learning_rate": 3.734463276836159e-05, + "loss": 0.0, + "num_tokens": 2041454.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 377 + }, + { + "clip_ratio": 0.0008243753691203892, + "epoch": 0.32033898305084746, + "grad_norm": 0.0, + "learning_rate": 3.732580037664784e-05, + "loss": 0.0, + "step": 378 + }, + { + "clip_ratio": 0.0007023674552328885, + "epoch": 0.3211864406779661, + "grad_norm": 0.0, + "learning_rate": 3.730696798493409e-05, + "loss": 0.0, + "step": 379 + }, + { + "clip_ratio": 0.0021256571635603905, + "epoch": 0.3220338983050847, + "grad_norm": 0.0, + "learning_rate": 3.728813559322034e-05, + "loss": 0.0, + "step": 380 + }, + { + "clip_ratio": 0.00031836971174925566, + "completion_length": 330.5357360839844, + "epoch": 0.3228813559322034, + "grad_norm": 0.0, + "learning_rate": 3.72693032015066e-05, + "loss": 0.0, + "num_tokens": 2067788.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 381 + }, + { + "clip_ratio": 0.0005261868936941028, + "epoch": 0.32372881355932204, + "grad_norm": 0.0, + "learning_rate": 3.725047080979284e-05, + "loss": 0.0, + "step": 382 + }, + { + "clip_ratio": 0.00022253258794080466, + "epoch": 0.32457627118644067, + "grad_norm": 0.0, + "learning_rate": 3.72316384180791e-05, + "loss": 0.0, + "step": 383 + }, + { + "clip_ratio": 0.0006931009120307863, + "epoch": 0.3254237288135593, + "grad_norm": 0.0, + "learning_rate": 3.721280602636535e-05, + "loss": 0.0, + "step": 384 + }, + { + "clip_ratio": 0.003750877920538187, + "completion_length": 131.32144165039062, + "epoch": 0.326271186440678, + "grad_norm": 0.13631999098251835, + "learning_rate": 3.71939736346516e-05, + "loss": -0.0398, + "num_tokens": 2082318.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 385 + }, + { + "clip_ratio": 0.008503442630171776, + "epoch": 0.3271186440677966, + "grad_norm": 0.10569445062232001, + "learning_rate": 3.717514124293786e-05, + "loss": -0.0422, + "step": 386 + }, + { + "clip_ratio": 0.023656172677874565, + "epoch": 0.32796610169491525, + "grad_norm": 0.08440756273471868, + "learning_rate": 3.715630885122411e-05, + "loss": -0.0449, + "step": 387 + }, + { + "clip_ratio": 0.05569107085466385, + "epoch": 0.3288135593220339, + "grad_norm": 0.0732865060873415, + "learning_rate": 3.713747645951036e-05, + "loss": -0.0475, + "step": 388 + }, + { + "clip_ratio": 0.0007906121318228543, + "completion_length": 286.9821472167969, + "epoch": 0.32966101694915256, + "grad_norm": 0.12082254351176634, + "learning_rate": 3.711864406779661e-05, + "loss": 0.0063, + "num_tokens": 2106125.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 389 + }, + { + "clip_ratio": 0.0015471117803826928, + "epoch": 0.3305084745762712, + "grad_norm": 0.11784337456998906, + "learning_rate": 3.7099811676082864e-05, + "loss": 0.0037, + "step": 390 + }, + { + "clip_ratio": 0.012488815933465958, + "epoch": 0.3313559322033898, + "grad_norm": 0.0941445528587045, + "learning_rate": 3.7080979284369115e-05, + "loss": 0.0003, + "step": 391 + }, + { + "clip_ratio": 0.028695791959762573, + "epoch": 0.33220338983050846, + "grad_norm": 0.08292380918920757, + "learning_rate": 3.706214689265537e-05, + "loss": -0.0027, + "step": 392 + }, + { + "clip_ratio": 0.0003602007054723799, + "completion_length": 239.83929443359375, + "epoch": 0.33305084745762714, + "grad_norm": 0.03143069205221857, + "learning_rate": 3.704331450094162e-05, + "loss": -0.0148, + "num_tokens": 2126076.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 393 + }, + { + "clip_ratio": 0.0013097112532705069, + "epoch": 0.3338983050847458, + "grad_norm": 0.03129928506196762, + "learning_rate": 3.7024482109227875e-05, + "loss": -0.0151, + "step": 394 + }, + { + "clip_ratio": 0.0041871643625199795, + "epoch": 0.3347457627118644, + "grad_norm": 0.030726291213562903, + "learning_rate": 3.7005649717514126e-05, + "loss": -0.0154, + "step": 395 + }, + { + "clip_ratio": 0.009951738640666008, + "epoch": 0.33559322033898303, + "grad_norm": 0.028938580269396656, + "learning_rate": 3.698681732580038e-05, + "loss": -0.0158, + "step": 396 + }, + { + "clip_ratio": 0.0010198758682236075, + "completion_length": 190.6607208251953, + "epoch": 0.3364406779661017, + "grad_norm": 0.0, + "learning_rate": 3.6967984934086635e-05, + "loss": 0.0, + "num_tokens": 2143449.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 397 + }, + { + "clip_ratio": 0.0006895315600559115, + "epoch": 0.33728813559322035, + "grad_norm": 0.0, + "learning_rate": 3.6949152542372886e-05, + "loss": 0.0, + "step": 398 + }, + { + "clip_ratio": 0.0038485280238091946, + "epoch": 0.338135593220339, + "grad_norm": 0.0, + "learning_rate": 3.693032015065914e-05, + "loss": 0.0, + "step": 399 + }, + { + "clip_ratio": 0.00733610987663269, + "epoch": 0.3389830508474576, + "grad_norm": 0.0, + "learning_rate": 3.691148775894539e-05, + "loss": 0.0, + "step": 400 + } + ], + "logging_steps": 1, + "max_steps": 2360, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1261db78e0a310bb2e0bd6333e2741bd2c4391ea --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3bd8ba987ac3c91f8253f49fc4f0e162f8c1db67922f9a6a6a7ad4757383ff +size 7544 diff --git a/checkpoint-400/zero_to_fp32.py b/checkpoint-400/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/checkpoint-400/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-500/.ipynb_checkpoints/trainer_state-checkpoint.json b/checkpoint-500/.ipynb_checkpoints/trainer_state-checkpoint.json new file mode 100644 index 0000000000000000000000000000000000000000..ccae6dc708bcf590de4430cf85d28c100ed90e88 --- /dev/null +++ b/checkpoint-500/.ipynb_checkpoints/trainer_state-checkpoint.json @@ -0,0 +1,5284 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.423728813559322, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio": 0.0, + "completion_length": 396.3571472167969, + "epoch": 0.000847457627118644, + "grad_norm": 0.028597827622128653, + "learning_rate": 1.6949152542372883e-07, + "loss": 0.0096, + "num_tokens": 29860.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 1 + }, + { + "clip_ratio": 0.0, + "epoch": 0.001694915254237288, + "grad_norm": 0.0283861264343528, + "learning_rate": 3.3898305084745766e-07, + "loss": 0.0096, + "step": 2 + }, + { + "clip_ratio": 0.0005210353410802782, + "epoch": 0.002542372881355932, + "grad_norm": 0.024416377206652233, + "learning_rate": 5.084745762711865e-07, + "loss": 0.0095, + "step": 3 + }, + { + "clip_ratio": 0.0003804714942816645, + "epoch": 0.003389830508474576, + "grad_norm": 0.024954590093213137, + "learning_rate": 6.779661016949153e-07, + "loss": 0.0096, + "step": 4 + }, + { + "clip_ratio": 0.00028131139697507024, + "completion_length": 477.6250305175781, + "epoch": 0.00423728813559322, + "grad_norm": 0.0, + "learning_rate": 8.474576271186441e-07, + "loss": 0.0, + "num_tokens": 64207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 5 + }, + { + "clip_ratio": 0.00026464727125130594, + "epoch": 0.005084745762711864, + "grad_norm": 0.0, + "learning_rate": 1.016949152542373e-06, + "loss": 0.0, + "step": 6 + }, + { + "clip_ratio": 0.0003427764168009162, + "epoch": 0.005932203389830509, + "grad_norm": 0.0, + "learning_rate": 1.186440677966102e-06, + "loss": 0.0, + "step": 7 + }, + { + "clip_ratio": 0.0003427252813708037, + "epoch": 0.006779661016949152, + "grad_norm": 0.0, + "learning_rate": 1.3559322033898307e-06, + "loss": 0.0, + "step": 8 + }, + { + "clip_ratio": 0.0003535364812705666, + "completion_length": 503.14288330078125, + "epoch": 0.007627118644067797, + "grad_norm": 0.0, + "learning_rate": 1.5254237288135596e-06, + "loss": 0.0, + "num_tokens": 99207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 9 + }, + { + "clip_ratio": 0.00017467686848249286, + "epoch": 0.00847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.6949152542372882e-06, + "loss": 0.0, + "step": 10 + }, + { + "clip_ratio": 0.0002140275464626029, + "epoch": 0.009322033898305085, + "grad_norm": 0.0, + "learning_rate": 1.8644067796610171e-06, + "loss": 0.0, + "step": 11 + }, + { + "clip_ratio": 0.00035844597732648253, + "epoch": 0.010169491525423728, + "grad_norm": 0.0, + "learning_rate": 2.033898305084746e-06, + "loss": 0.0, + "step": 12 + }, + { + "clip_ratio": 0.00035540881799533963, + "completion_length": 471.83929443359375, + "epoch": 0.011016949152542373, + "grad_norm": 0.0, + "learning_rate": 2.203389830508475e-06, + "loss": 0.0, + "num_tokens": 132582.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 13 + }, + { + "clip_ratio": 0.0002507771132513881, + "epoch": 0.011864406779661017, + "grad_norm": 0.0, + "learning_rate": 2.372881355932204e-06, + "loss": 0.0, + "step": 14 + }, + { + "clip_ratio": 0.0001079499488696456, + "epoch": 0.012711864406779662, + "grad_norm": 0.0, + "learning_rate": 2.5423728813559323e-06, + "loss": 0.0, + "step": 15 + }, + { + "clip_ratio": 0.00021258163906168193, + "epoch": 0.013559322033898305, + "grad_norm": 0.0, + "learning_rate": 2.7118644067796613e-06, + "loss": 0.0, + "step": 16 + }, + { + "clip_ratio": 0.000322989042615518, + "completion_length": 387.14288330078125, + "epoch": 0.01440677966101695, + "grad_norm": 0.016452011518392446, + "learning_rate": 2.8813559322033903e-06, + "loss": 0.0658, + "num_tokens": 161406.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 17 + }, + { + "clip_ratio": 0.00034964055521413684, + "epoch": 0.015254237288135594, + "grad_norm": 0.017719451531367687, + "learning_rate": 3.0508474576271192e-06, + "loss": 0.0657, + "step": 18 + }, + { + "clip_ratio": 0.0004103984101675451, + "epoch": 0.016101694915254237, + "grad_norm": 0.016469439956852048, + "learning_rate": 3.2203389830508473e-06, + "loss": 0.0657, + "step": 19 + }, + { + "clip_ratio": 0.0003408819029573351, + "epoch": 0.01694915254237288, + "grad_norm": 0.017326107824003897, + "learning_rate": 3.3898305084745763e-06, + "loss": 0.0657, + "step": 20 + }, + { + "clip_ratio": 0.00046000577276572585, + "completion_length": 481.732177734375, + "epoch": 0.017796610169491526, + "grad_norm": 0.0, + "learning_rate": 3.5593220338983053e-06, + "loss": 0.0, + "num_tokens": 195711.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 21 + }, + { + "clip_ratio": 0.00042848457815125585, + "epoch": 0.01864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.7288135593220342e-06, + "loss": 0.0, + "step": 22 + }, + { + "clip_ratio": 0.0004297326668165624, + "epoch": 0.019491525423728815, + "grad_norm": 0.0, + "learning_rate": 3.898305084745763e-06, + "loss": 0.0, + "step": 23 + }, + { + "clip_ratio": 0.000281251355772838, + "epoch": 0.020338983050847456, + "grad_norm": 0.0, + "learning_rate": 4.067796610169492e-06, + "loss": 0.0, + "step": 24 + }, + { + "clip_ratio": 0.00017563004803378135, + "completion_length": 442.7500305175781, + "epoch": 0.0211864406779661, + "grad_norm": 0.11157048303951664, + "learning_rate": 4.23728813559322e-06, + "loss": 0.0104, + "num_tokens": 227185.0, + "reward": -0.8214285969734192, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 25 + }, + { + "clip_ratio": 0.00010569583537289873, + "epoch": 0.022033898305084745, + "grad_norm": 0.12213723346474271, + "learning_rate": 4.40677966101695e-06, + "loss": 0.0104, + "step": 26 + }, + { + "clip_ratio": 0.0005364188691601157, + "epoch": 0.02288135593220339, + "grad_norm": 0.11319483991164629, + "learning_rate": 4.576271186440678e-06, + "loss": 0.0106, + "step": 27 + }, + { + "clip_ratio": 0.0010358322178944945, + "epoch": 0.023728813559322035, + "grad_norm": 0.10119136649790463, + "learning_rate": 4.745762711864408e-06, + "loss": 0.0101, + "step": 28 + }, + { + "clip_ratio": 0.0002854761842172593, + "completion_length": 420.51788330078125, + "epoch": 0.02457627118644068, + "grad_norm": 0.0, + "learning_rate": 4.915254237288136e-06, + "loss": 0.0, + "num_tokens": 257614.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 29 + }, + { + "clip_ratio": 0.00021371705224737525, + "epoch": 0.025423728813559324, + "grad_norm": 0.0, + "learning_rate": 5.084745762711865e-06, + "loss": 0.0, + "step": 30 + }, + { + "clip_ratio": 0.00016422003682237118, + "epoch": 0.026271186440677965, + "grad_norm": 0.0, + "learning_rate": 5.254237288135594e-06, + "loss": 0.0, + "step": 31 + }, + { + "clip_ratio": 0.000256577244726941, + "epoch": 0.02711864406779661, + "grad_norm": 0.0, + "learning_rate": 5.423728813559323e-06, + "loss": 0.0, + "step": 32 + }, + { + "clip_ratio": 0.00045646229409612715, + "completion_length": 465.1250305175781, + "epoch": 0.027966101694915254, + "grad_norm": 0.017873238036622066, + "learning_rate": 5.593220338983051e-06, + "loss": 0.0246, + "num_tokens": 290581.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 33 + }, + { + "clip_ratio": 0.0006314113852567971, + "epoch": 0.0288135593220339, + "grad_norm": 0.01732638271233714, + "learning_rate": 5.7627118644067805e-06, + "loss": 0.0247, + "step": 34 + }, + { + "clip_ratio": 0.00045800459338352084, + "epoch": 0.029661016949152543, + "grad_norm": 0.017593288926627842, + "learning_rate": 5.932203389830509e-06, + "loss": 0.0247, + "step": 35 + }, + { + "clip_ratio": 0.0004213759966660291, + "epoch": 0.030508474576271188, + "grad_norm": 0.017758527483606314, + "learning_rate": 6.1016949152542385e-06, + "loss": 0.0247, + "step": 36 + }, + { + "clip_ratio": 0.00027920620050281286, + "completion_length": 487.982177734375, + "epoch": 0.03135593220338983, + "grad_norm": 0.017492673426871806, + "learning_rate": 6.271186440677966e-06, + "loss": 0.0287, + "num_tokens": 325036.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 37 + }, + { + "clip_ratio": 0.0003654122701846063, + "epoch": 0.03220338983050847, + "grad_norm": 0.016942624524485753, + "learning_rate": 6.440677966101695e-06, + "loss": 0.0287, + "step": 38 + }, + { + "clip_ratio": 0.0002445173158776015, + "epoch": 0.03305084745762712, + "grad_norm": 0.017357366453315624, + "learning_rate": 6.610169491525424e-06, + "loss": 0.0287, + "step": 39 + }, + { + "clip_ratio": 0.00027939456049352884, + "epoch": 0.03389830508474576, + "grad_norm": 0.017497160548341977, + "learning_rate": 6.779661016949153e-06, + "loss": 0.0287, + "step": 40 + }, + { + "clip_ratio": 0.00030169120873324573, + "completion_length": 337.76788330078125, + "epoch": 0.03474576271186441, + "grad_norm": 0.013386997712677729, + "learning_rate": 6.949152542372882e-06, + "loss": 0.0194, + "num_tokens": 351879.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 41 + }, + { + "clip_ratio": 0.000481679366203025, + "epoch": 0.03559322033898305, + "grad_norm": 0.013534365829241167, + "learning_rate": 7.1186440677966106e-06, + "loss": 0.0194, + "step": 42 + }, + { + "clip_ratio": 0.0006071141688153148, + "epoch": 0.036440677966101696, + "grad_norm": 0.013688658779614732, + "learning_rate": 7.288135593220339e-06, + "loss": 0.0193, + "step": 43 + }, + { + "clip_ratio": 0.0005443710251711309, + "epoch": 0.03728813559322034, + "grad_norm": 0.013415623466192152, + "learning_rate": 7.4576271186440685e-06, + "loss": 0.0194, + "step": 44 + }, + { + "clip_ratio": 0.00027171947294846177, + "completion_length": 358.6964416503906, + "epoch": 0.038135593220338986, + "grad_norm": 0.0, + "learning_rate": 7.627118644067797e-06, + "loss": 0.0, + "num_tokens": 379414.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 45 + }, + { + "clip_ratio": 0.00027013494400307536, + "epoch": 0.03898305084745763, + "grad_norm": 0.0, + "learning_rate": 7.796610169491526e-06, + "loss": 0.0, + "step": 46 + }, + { + "clip_ratio": 0.00023684222833253443, + "epoch": 0.03983050847457627, + "grad_norm": 0.0, + "learning_rate": 7.966101694915255e-06, + "loss": 0.0, + "step": 47 + }, + { + "clip_ratio": 0.0004315820406191051, + "epoch": 0.04067796610169491, + "grad_norm": 0.0, + "learning_rate": 8.135593220338983e-06, + "loss": 0.0, + "step": 48 + }, + { + "clip_ratio": 0.00034640118246898055, + "completion_length": 392.46429443359375, + "epoch": 0.04152542372881356, + "grad_norm": 0.05155975490631469, + "learning_rate": 8.305084745762712e-06, + "loss": -0.023, + "num_tokens": 408424.0, + "reward": -0.8571429252624512, + "reward_std": 0.24888646602630615, + "rewards/check_winston_local_func/mean": -0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 49 + }, + { + "clip_ratio": 0.00034579477505758405, + "epoch": 0.0423728813559322, + "grad_norm": 0.051568368553185584, + "learning_rate": 8.47457627118644e-06, + "loss": -0.0233, + "step": 50 + }, + { + "clip_ratio": 0.0005872369511052966, + "epoch": 0.043220338983050846, + "grad_norm": 0.054569986775825835, + "learning_rate": 8.64406779661017e-06, + "loss": -0.0235, + "step": 51 + }, + { + "clip_ratio": 0.00048618926666677, + "epoch": 0.04406779661016949, + "grad_norm": 0.05573624590215382, + "learning_rate": 8.8135593220339e-06, + "loss": -0.0236, + "step": 52 + }, + { + "clip_ratio": 0.000333156727720052, + "completion_length": 485.7500305175781, + "epoch": 0.044915254237288135, + "grad_norm": 0.0, + "learning_rate": 8.983050847457628e-06, + "loss": 0.0, + "num_tokens": 442986.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 53 + }, + { + "clip_ratio": 0.00042045177542604506, + "epoch": 0.04576271186440678, + "grad_norm": 0.0, + "learning_rate": 9.152542372881356e-06, + "loss": 0.0, + "step": 54 + }, + { + "clip_ratio": 0.00031678256345912814, + "epoch": 0.046610169491525424, + "grad_norm": 0.0, + "learning_rate": 9.322033898305085e-06, + "loss": 0.0, + "step": 55 + }, + { + "clip_ratio": 0.00010463170474395156, + "epoch": 0.04745762711864407, + "grad_norm": 0.0, + "learning_rate": 9.491525423728815e-06, + "loss": 0.0, + "step": 56 + }, + { + "clip_ratio": 0.0007074553286656737, + "completion_length": 428.3214416503906, + "epoch": 0.048305084745762714, + "grad_norm": 0.04153528214569023, + "learning_rate": 9.661016949152544e-06, + "loss": 0.0343, + "num_tokens": 473892.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 57 + }, + { + "clip_ratio": 0.0004013319849036634, + "epoch": 0.04915254237288136, + "grad_norm": 0.04657277213309362, + "learning_rate": 9.830508474576272e-06, + "loss": 0.0342, + "step": 58 + }, + { + "clip_ratio": 0.00044179416727274656, + "epoch": 0.05, + "grad_norm": 0.045153415468062494, + "learning_rate": 1e-05, + "loss": 0.0343, + "step": 59 + }, + { + "clip_ratio": 0.0007794442353770137, + "epoch": 0.05084745762711865, + "grad_norm": 0.035363902861678634, + "learning_rate": 1.016949152542373e-05, + "loss": 0.0339, + "step": 60 + }, + { + "clip_ratio": 0.00021712151647079736, + "completion_length": 299.8035888671875, + "epoch": 0.051694915254237285, + "grad_norm": 0.07205399219848665, + "learning_rate": 1.0338983050847458e-05, + "loss": 0.0477, + "num_tokens": 497465.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 61 + }, + { + "clip_ratio": 0.0002563712769187987, + "epoch": 0.05254237288135593, + "grad_norm": 0.07155354465871978, + "learning_rate": 1.0508474576271188e-05, + "loss": 0.0475, + "step": 62 + }, + { + "clip_ratio": 0.0001442718057660386, + "epoch": 0.053389830508474574, + "grad_norm": 0.07289445064494822, + "learning_rate": 1.0677966101694917e-05, + "loss": 0.0474, + "step": 63 + }, + { + "clip_ratio": 0.001116903149522841, + "epoch": 0.05423728813559322, + "grad_norm": 0.06596181254777028, + "learning_rate": 1.0847457627118645e-05, + "loss": 0.0468, + "step": 64 + }, + { + "clip_ratio": 0.00027901786961592734, + "completion_length": 480.4464416503906, + "epoch": 0.05508474576271186, + "grad_norm": 0.0, + "learning_rate": 1.1016949152542374e-05, + "loss": 0.0, + "num_tokens": 532266.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 65 + }, + { + "clip_ratio": 0.00037270825123414397, + "epoch": 0.05593220338983051, + "grad_norm": 0.0, + "learning_rate": 1.1186440677966102e-05, + "loss": 0.0, + "step": 66 + }, + { + "clip_ratio": 0.0006563978386111557, + "epoch": 0.05677966101694915, + "grad_norm": 0.0, + "learning_rate": 1.1355932203389833e-05, + "loss": 0.0, + "step": 67 + }, + { + "clip_ratio": 0.0008186621707864106, + "epoch": 0.0576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.1525423728813561e-05, + "loss": 0.0, + "step": 68 + }, + { + "clip_ratio": 0.0005370522267185152, + "completion_length": 420.3214416503906, + "epoch": 0.05847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.169491525423729e-05, + "loss": 0.0, + "num_tokens": 563380.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 69 + }, + { + "clip_ratio": 0.0007551547605544329, + "epoch": 0.059322033898305086, + "grad_norm": 0.0, + "learning_rate": 1.1864406779661018e-05, + "loss": 0.0, + "step": 70 + }, + { + "clip_ratio": 0.0004996137577109039, + "epoch": 0.06016949152542373, + "grad_norm": 0.0, + "learning_rate": 1.2033898305084745e-05, + "loss": 0.0, + "step": 71 + }, + { + "clip_ratio": 0.0007176484214141965, + "epoch": 0.061016949152542375, + "grad_norm": 0.0, + "learning_rate": 1.2203389830508477e-05, + "loss": 0.0, + "step": 72 + }, + { + "clip_ratio": 0.0004170738684479147, + "completion_length": 383.6964416503906, + "epoch": 0.06186440677966102, + "grad_norm": 0.01481240616851262, + "learning_rate": 1.2372881355932205e-05, + "loss": 0.0412, + "num_tokens": 592003.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 73 + }, + { + "clip_ratio": 0.0008365331450477242, + "epoch": 0.06271186440677966, + "grad_norm": 0.01522897212214854, + "learning_rate": 1.2542372881355932e-05, + "loss": 0.0411, + "step": 74 + }, + { + "clip_ratio": 0.000981268472969532, + "epoch": 0.0635593220338983, + "grad_norm": 0.014948882448171377, + "learning_rate": 1.2711864406779661e-05, + "loss": 0.0411, + "step": 75 + }, + { + "clip_ratio": 0.0006704007391817868, + "epoch": 0.06440677966101695, + "grad_norm": 0.015045917131498382, + "learning_rate": 1.288135593220339e-05, + "loss": 0.041, + "step": 76 + }, + { + "clip_ratio": 0.00022424904454965144, + "completion_length": 437.9821472167969, + "epoch": 0.06525423728813559, + "grad_norm": 0.030968041587588573, + "learning_rate": 1.305084745762712e-05, + "loss": 0.0453, + "num_tokens": 623050.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 77 + }, + { + "clip_ratio": 0.00053448136895895, + "epoch": 0.06610169491525424, + "grad_norm": 0.02976001587219013, + "learning_rate": 1.3220338983050848e-05, + "loss": 0.0453, + "step": 78 + }, + { + "clip_ratio": 0.0010130176087841392, + "epoch": 0.06694915254237288, + "grad_norm": 0.02743385432574901, + "learning_rate": 1.3389830508474577e-05, + "loss": 0.045, + "step": 79 + }, + { + "clip_ratio": 0.0011749044060707092, + "epoch": 0.06779661016949153, + "grad_norm": 0.025462048937107604, + "learning_rate": 1.3559322033898305e-05, + "loss": 0.045, + "step": 80 + }, + { + "clip_ratio": 0.001996266655623913, + "completion_length": 382.2321472167969, + "epoch": 0.06864406779661017, + "grad_norm": 0.13457631329414246, + "learning_rate": 1.3728813559322034e-05, + "loss": 0.0135, + "num_tokens": 651839.0, + "reward": -0.6785714626312256, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.6785714030265808, + "rewards/check_winston_local_func/std": 0.7411819696426392, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 81 + }, + { + "clip_ratio": 0.003203267464414239, + "epoch": 0.06949152542372881, + "grad_norm": 0.11807541511453928, + "learning_rate": 1.3898305084745764e-05, + "loss": 0.0128, + "step": 82 + }, + { + "clip_ratio": 0.011069249361753464, + "epoch": 0.07033898305084746, + "grad_norm": 0.0768781703261771, + "learning_rate": 1.4067796610169493e-05, + "loss": 0.0118, + "step": 83 + }, + { + "clip_ratio": 0.013229678384959698, + "epoch": 0.0711864406779661, + "grad_norm": 0.07925229229917279, + "learning_rate": 1.4237288135593221e-05, + "loss": 0.011, + "step": 84 + }, + { + "clip_ratio": 0.0002107896434608847, + "completion_length": 397.1964416503906, + "epoch": 0.07203389830508475, + "grad_norm": 0.0461083173277337, + "learning_rate": 1.440677966101695e-05, + "loss": 0.0389, + "num_tokens": 681218.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 85 + }, + { + "clip_ratio": 0.0010596371721476316, + "epoch": 0.07288135593220339, + "grad_norm": 0.04449467794694347, + "learning_rate": 1.4576271186440678e-05, + "loss": 0.0384, + "step": 86 + }, + { + "clip_ratio": 0.002870997181162238, + "epoch": 0.07372881355932204, + "grad_norm": 0.038978879976910054, + "learning_rate": 1.4745762711864408e-05, + "loss": 0.038, + "step": 87 + }, + { + "clip_ratio": 0.006624125875532627, + "epoch": 0.07457627118644068, + "grad_norm": 0.0364842012372814, + "learning_rate": 1.4915254237288137e-05, + "loss": 0.0377, + "step": 88 + }, + { + "clip_ratio": 0.00043057286529801786, + "completion_length": 399.64288330078125, + "epoch": 0.07542372881355933, + "grad_norm": 0.014090924578944663, + "learning_rate": 1.5084745762711865e-05, + "loss": 0.0328, + "num_tokens": 711078.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 89 + }, + { + "clip_ratio": 0.0018296982161700726, + "epoch": 0.07627118644067797, + "grad_norm": 0.014531205963070252, + "learning_rate": 1.5254237288135594e-05, + "loss": 0.0328, + "step": 90 + }, + { + "clip_ratio": 0.004530549980700016, + "epoch": 0.07711864406779662, + "grad_norm": 0.014754831265979268, + "learning_rate": 1.5423728813559326e-05, + "loss": 0.0327, + "step": 91 + }, + { + "clip_ratio": 0.008132151328027248, + "epoch": 0.07796610169491526, + "grad_norm": 0.014608619166449479, + "learning_rate": 1.5593220338983053e-05, + "loss": 0.0326, + "step": 92 + }, + { + "clip_ratio": 0.0007373582920990884, + "completion_length": 467.71429443359375, + "epoch": 0.0788135593220339, + "grad_norm": 0.041297580984419976, + "learning_rate": 1.576271186440678e-05, + "loss": 0.0616, + "num_tokens": 745862.0, + "reward": -0.8928571939468384, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 93 + }, + { + "clip_ratio": 0.001287531922571361, + "epoch": 0.07966101694915254, + "grad_norm": 0.030858648065290283, + "learning_rate": 1.593220338983051e-05, + "loss": 0.0614, + "step": 94 + }, + { + "clip_ratio": 0.0023924303241074085, + "epoch": 0.08050847457627118, + "grad_norm": 0.03463914321182917, + "learning_rate": 1.6101694915254237e-05, + "loss": 0.0613, + "step": 95 + }, + { + "clip_ratio": 0.00350037869066, + "epoch": 0.08135593220338982, + "grad_norm": 0.02665011286164521, + "learning_rate": 1.6271186440677967e-05, + "loss": 0.0611, + "step": 96 + }, + { + "clip_ratio": 0.0006918495637364686, + "completion_length": 320.75, + "epoch": 0.08220338983050847, + "grad_norm": 0.06373891470490567, + "learning_rate": 1.6440677966101697e-05, + "loss": -0.015, + "num_tokens": 771576.0, + "reward": -0.7500000596046448, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 97 + }, + { + "clip_ratio": 0.0029753418639302254, + "epoch": 0.08305084745762711, + "grad_norm": 0.05523249333421511, + "learning_rate": 1.6610169491525424e-05, + "loss": -0.0157, + "step": 98 + }, + { + "clip_ratio": 0.00716389948502183, + "epoch": 0.08389830508474576, + "grad_norm": 0.04924083222576615, + "learning_rate": 1.6779661016949154e-05, + "loss": -0.0158, + "step": 99 + }, + { + "clip_ratio": 0.011036296375095844, + "epoch": 0.0847457627118644, + "grad_norm": 0.04955323333773024, + "learning_rate": 1.694915254237288e-05, + "loss": -0.0163, + "step": 100 + }, + { + "clip_ratio": 0.00038607188616879284, + "completion_length": 507.2500305175781, + "epoch": 0.08559322033898305, + "grad_norm": 0.0, + "learning_rate": 1.711864406779661e-05, + "loss": 0.0, + "num_tokens": 807230.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 101 + }, + { + "clip_ratio": 0.0004233713843859732, + "epoch": 0.08644067796610169, + "grad_norm": 0.0, + "learning_rate": 1.728813559322034e-05, + "loss": 0.0, + "step": 102 + }, + { + "clip_ratio": 0.0005304253427311778, + "epoch": 0.08728813559322034, + "grad_norm": 0.0, + "learning_rate": 1.745762711864407e-05, + "loss": 0.0, + "step": 103 + }, + { + "clip_ratio": 0.0008094432414509356, + "epoch": 0.08813559322033898, + "grad_norm": 0.0, + "learning_rate": 1.76271186440678e-05, + "loss": 0.0, + "step": 104 + }, + { + "clip_ratio": 0.0003136220038868487, + "completion_length": 309.4821472167969, + "epoch": 0.08898305084745763, + "grad_norm": 0.1215376293190595, + "learning_rate": 1.7796610169491526e-05, + "loss": 0.059, + "num_tokens": 830873.0, + "reward": -0.6071428656578064, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.6071428656578064, + "rewards/check_winston_local_func/std": 0.8017837405204773, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 105 + }, + { + "clip_ratio": 0.005680752452462912, + "epoch": 0.08983050847457627, + "grad_norm": 0.08882976004122672, + "learning_rate": 1.7966101694915256e-05, + "loss": 0.057, + "step": 106 + }, + { + "clip_ratio": 0.013865095563232899, + "epoch": 0.09067796610169492, + "grad_norm": 0.07178187465318808, + "learning_rate": 1.8135593220338986e-05, + "loss": 0.0551, + "step": 107 + }, + { + "clip_ratio": 0.025337526574730873, + "epoch": 0.09152542372881356, + "grad_norm": 0.05889114052835241, + "learning_rate": 1.8305084745762713e-05, + "loss": 0.054, + "step": 108 + }, + { + "clip_ratio": 0.0004973930190317333, + "completion_length": 309.2857360839844, + "epoch": 0.0923728813559322, + "grad_norm": 0.10159993090017184, + "learning_rate": 1.8474576271186443e-05, + "loss": 0.1029, + "num_tokens": 856689.0, + "reward": -0.7500000596046448, + "reward_std": 0.4123912453651428, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 109 + }, + { + "clip_ratio": 0.005504293367266655, + "epoch": 0.09322033898305085, + "grad_norm": 0.09390182129772277, + "learning_rate": 1.864406779661017e-05, + "loss": 0.1017, + "step": 110 + }, + { + "clip_ratio": 0.022907190024852753, + "epoch": 0.0940677966101695, + "grad_norm": 0.08701453983072766, + "learning_rate": 1.88135593220339e-05, + "loss": 0.0999, + "step": 111 + }, + { + "clip_ratio": 0.04514092579483986, + "epoch": 0.09491525423728814, + "grad_norm": 0.08477253768734147, + "learning_rate": 1.898305084745763e-05, + "loss": 0.0987, + "step": 112 + }, + { + "clip_ratio": 0.0005664547788910568, + "completion_length": 434.39288330078125, + "epoch": 0.09576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.9152542372881357e-05, + "loss": 0.0, + "num_tokens": 888255.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 113 + }, + { + "clip_ratio": 0.0015907255001366138, + "epoch": 0.09661016949152543, + "grad_norm": 0.0, + "learning_rate": 1.9322033898305087e-05, + "loss": 0.0, + "step": 114 + }, + { + "clip_ratio": 0.003365863347426057, + "epoch": 0.09745762711864407, + "grad_norm": 0.0, + "learning_rate": 1.9491525423728814e-05, + "loss": 0.0, + "step": 115 + }, + { + "clip_ratio": 0.006915883626788855, + "epoch": 0.09830508474576272, + "grad_norm": 0.0, + "learning_rate": 1.9661016949152545e-05, + "loss": 0.0, + "step": 116 + }, + { + "clip_ratio": 0.0015928384382277727, + "completion_length": 311.08929443359375, + "epoch": 0.09915254237288136, + "grad_norm": 0.1669528890016949, + "learning_rate": 1.9830508474576275e-05, + "loss": 0.0592, + "num_tokens": 912948.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 117 + }, + { + "clip_ratio": 0.006070761010050774, + "epoch": 0.1, + "grad_norm": 0.15701074253607375, + "learning_rate": 2e-05, + "loss": 0.056, + "step": 118 + }, + { + "clip_ratio": 0.03282368928194046, + "epoch": 0.10084745762711865, + "grad_norm": 0.21942626154682726, + "learning_rate": 2.016949152542373e-05, + "loss": 0.0526, + "step": 119 + }, + { + "clip_ratio": 0.0628986731171608, + "epoch": 0.1016949152542373, + "grad_norm": 0.1568339023062343, + "learning_rate": 2.033898305084746e-05, + "loss": 0.0497, + "step": 120 + }, + { + "clip_ratio": 0.0003240547957830131, + "completion_length": 490.607177734375, + "epoch": 0.10254237288135593, + "grad_norm": 0.0, + "learning_rate": 2.0508474576271186e-05, + "loss": 0.0, + "num_tokens": 947318.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 121 + }, + { + "clip_ratio": 0.00037375700776465237, + "epoch": 0.10338983050847457, + "grad_norm": 0.0, + "learning_rate": 2.0677966101694916e-05, + "loss": 0.0, + "step": 122 + }, + { + "clip_ratio": 0.0011371899163350463, + "epoch": 0.10423728813559321, + "grad_norm": 0.0, + "learning_rate": 2.084745762711865e-05, + "loss": 0.0, + "step": 123 + }, + { + "clip_ratio": 0.0022452734410762787, + "epoch": 0.10508474576271186, + "grad_norm": 0.0, + "learning_rate": 2.1016949152542376e-05, + "loss": 0.0, + "step": 124 + }, + { + "clip_ratio": 0.004924725275486708, + "completion_length": 324.58929443359375, + "epoch": 0.1059322033898305, + "grad_norm": 0.3997089536055672, + "learning_rate": 2.1186440677966103e-05, + "loss": 0.04, + "num_tokens": 972527.0, + "reward": -0.8214285969734192, + "reward_std": 0.36553531885147095, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 125 + }, + { + "clip_ratio": 0.036066196858882904, + "epoch": 0.10677966101694915, + "grad_norm": 0.4003737832223874, + "learning_rate": 2.1355932203389833e-05, + "loss": 0.0371, + "step": 126 + }, + { + "clip_ratio": 0.06804865598678589, + "epoch": 0.10762711864406779, + "grad_norm": 0.3262616499772286, + "learning_rate": 2.152542372881356e-05, + "loss": 0.0328, + "step": 127 + }, + { + "clip_ratio": 0.08261267095804214, + "epoch": 0.10847457627118644, + "grad_norm": 0.19475445080797668, + "learning_rate": 2.169491525423729e-05, + "loss": 0.0284, + "step": 128 + }, + { + "clip_ratio": 0.00042747953557409346, + "completion_length": 441.6785888671875, + "epoch": 0.10932203389830508, + "grad_norm": 0.07121815374577634, + "learning_rate": 2.1864406779661017e-05, + "loss": 0.0215, + "num_tokens": 1005157.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 129 + }, + { + "clip_ratio": 0.0005996564286760986, + "epoch": 0.11016949152542373, + "grad_norm": 0.07374447574020743, + "learning_rate": 2.2033898305084748e-05, + "loss": 0.021, + "step": 130 + }, + { + "clip_ratio": 0.0070611475966870785, + "epoch": 0.11101694915254237, + "grad_norm": 0.0484843694410488, + "learning_rate": 2.2203389830508474e-05, + "loss": 0.02, + "step": 131 + }, + { + "clip_ratio": 0.02419929951429367, + "epoch": 0.11186440677966102, + "grad_norm": 0.03734227928764934, + "learning_rate": 2.2372881355932205e-05, + "loss": 0.0194, + "step": 132 + }, + { + "clip_ratio": 0.0008097242680378258, + "completion_length": 299.9285888671875, + "epoch": 0.11271186440677966, + "grad_norm": 0.2037296860020652, + "learning_rate": 2.2542372881355935e-05, + "loss": 0.0123, + "num_tokens": 1029577.0, + "reward": -0.4285714626312256, + "reward_std": 0.49777287244796753, + "rewards/check_winston_local_func/mean": -0.4285714328289032, + "rewards/check_winston_local_func/std": 0.9116845726966858, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 133 + }, + { + "clip_ratio": 0.010970565490424633, + "epoch": 0.1135593220338983, + "grad_norm": 0.15172018259613887, + "learning_rate": 2.2711864406779665e-05, + "loss": 0.0095, + "step": 134 + }, + { + "clip_ratio": 0.027290966361761093, + "epoch": 0.11440677966101695, + "grad_norm": 0.14632003828933562, + "learning_rate": 2.2881355932203392e-05, + "loss": 0.0066, + "step": 135 + }, + { + "clip_ratio": 0.04884405434131622, + "epoch": 0.1152542372881356, + "grad_norm": 0.13010992493757564, + "learning_rate": 2.3050847457627122e-05, + "loss": 0.0037, + "step": 136 + }, + { + "clip_ratio": 0.00016204381245188415, + "completion_length": 397.9821472167969, + "epoch": 0.11610169491525424, + "grad_norm": 0.0819715923540025, + "learning_rate": 2.322033898305085e-05, + "loss": 0.0348, + "num_tokens": 1059368.0, + "reward": -0.7500000596046448, + "reward_std": 0.3499017357826233, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 137 + }, + { + "clip_ratio": 0.0012223825324326754, + "epoch": 0.11694915254237288, + "grad_norm": 0.07970324522981491, + "learning_rate": 2.338983050847458e-05, + "loss": 0.0336, + "step": 138 + }, + { + "clip_ratio": 0.015393489971756935, + "epoch": 0.11779661016949153, + "grad_norm": 0.07570693688371119, + "learning_rate": 2.3559322033898306e-05, + "loss": 0.0321, + "step": 139 + }, + { + "clip_ratio": 0.07253921031951904, + "epoch": 0.11864406779661017, + "grad_norm": 0.05800544884381334, + "learning_rate": 2.3728813559322036e-05, + "loss": 0.0305, + "step": 140 + }, + { + "clip_ratio": 0.00020609110652003437, + "completion_length": 376.3035888671875, + "epoch": 0.11949152542372882, + "grad_norm": 0.16488571125022886, + "learning_rate": 2.3898305084745763e-05, + "loss": -0.0156, + "num_tokens": 1088561.0, + "reward": -0.5, + "reward_std": 0.686587929725647, + "rewards/check_winston_local_func/mean": -0.5, + "rewards/check_winston_local_func/std": 0.8738628625869751, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 141 + }, + { + "clip_ratio": 0.020974619314074516, + "epoch": 0.12033898305084746, + "grad_norm": 0.12033253885509411, + "learning_rate": 2.406779661016949e-05, + "loss": -0.02, + "step": 142 + }, + { + "clip_ratio": 0.14757588505744934, + "epoch": 0.1211864406779661, + "grad_norm": 0.18906094003962706, + "learning_rate": 2.4237288135593224e-05, + "loss": -0.0215, + "step": 143 + }, + { + "clip_ratio": 0.18001240491867065, + "epoch": 0.12203389830508475, + "grad_norm": 0.2094330456679022, + "learning_rate": 2.4406779661016954e-05, + "loss": -0.0238, + "step": 144 + }, + { + "clip_ratio": 0.0010827317601069808, + "completion_length": 216.85714721679688, + "epoch": 0.1228813559322034, + "grad_norm": 0.22593574409537565, + "learning_rate": 2.457627118644068e-05, + "loss": -0.057, + "num_tokens": 1107713.0, + "reward": -0.0357142873108387, + "reward_std": 0.808063805103302, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 145 + }, + { + "clip_ratio": 0.01685175858438015, + "epoch": 0.12372881355932204, + "grad_norm": 0.21920453847219976, + "learning_rate": 2.474576271186441e-05, + "loss": -0.0622, + "step": 146 + }, + { + "clip_ratio": 0.05698274075984955, + "epoch": 0.12457627118644068, + "grad_norm": 0.23790061749019706, + "learning_rate": 2.4915254237288138e-05, + "loss": -0.0672, + "step": 147 + }, + { + "clip_ratio": 0.06983836740255356, + "epoch": 0.12542372881355932, + "grad_norm": 0.19359662720887325, + "learning_rate": 2.5084745762711865e-05, + "loss": -0.0724, + "step": 148 + }, + { + "clip_ratio": 0.0013232758501544595, + "completion_length": 251.96429443359375, + "epoch": 0.12627118644067797, + "grad_norm": 0.27961740628458276, + "learning_rate": 2.5254237288135595e-05, + "loss": 0.06, + "num_tokens": 1129487.0, + "reward": -0.0357142873108387, + "reward_std": 0.9462584257125854, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 149 + }, + { + "clip_ratio": 0.03364234417676926, + "epoch": 0.1271186440677966, + "grad_norm": 0.19276991014072303, + "learning_rate": 2.5423728813559322e-05, + "loss": 0.054, + "step": 150 + }, + { + "clip_ratio": 0.1430949568748474, + "epoch": 0.12796610169491526, + "grad_norm": 0.2768368269508983, + "learning_rate": 2.5593220338983052e-05, + "loss": 0.0518, + "step": 151 + }, + { + "clip_ratio": 0.16415317356586456, + "epoch": 0.1288135593220339, + "grad_norm": 0.25743304440606246, + "learning_rate": 2.576271186440678e-05, + "loss": 0.0475, + "step": 152 + }, + { + "clip_ratio": 0.0013469145633280277, + "completion_length": 204.48214721679688, + "epoch": 0.12966101694915255, + "grad_norm": 0.28188012404317475, + "learning_rate": 2.5932203389830512e-05, + "loss": 0.0527, + "num_tokens": 1148354.0, + "reward": 0.1428571492433548, + "reward_std": 0.7129831910133362, + "rewards/check_winston_local_func/mean": 0.1428571492433548, + "rewards/check_winston_local_func/std": 0.9987004995346069, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 153 + }, + { + "clip_ratio": 0.016695290803909302, + "epoch": 0.13050847457627118, + "grad_norm": 0.2641379759457116, + "learning_rate": 2.610169491525424e-05, + "loss": 0.0473, + "step": 154 + }, + { + "clip_ratio": 0.05237039551138878, + "epoch": 0.13135593220338984, + "grad_norm": 0.20691108630731772, + "learning_rate": 2.627118644067797e-05, + "loss": 0.0414, + "step": 155 + }, + { + "clip_ratio": 0.0867982804775238, + "epoch": 0.13220338983050847, + "grad_norm": 0.15341544674011254, + "learning_rate": 2.6440677966101696e-05, + "loss": 0.0351, + "step": 156 + }, + { + "clip_ratio": 0.0006545564392581582, + "completion_length": 233.9285888671875, + "epoch": 0.13305084745762713, + "grad_norm": 0.16036976523795443, + "learning_rate": 2.6610169491525427e-05, + "loss": 0.0179, + "num_tokens": 1168622.0, + "reward": 0.7142857313156128, + "reward_std": 0.4016071856021881, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 157 + }, + { + "clip_ratio": 0.00993060227483511, + "epoch": 0.13389830508474576, + "grad_norm": 0.1298083776077636, + "learning_rate": 2.6779661016949153e-05, + "loss": 0.0151, + "step": 158 + }, + { + "clip_ratio": 0.0733163133263588, + "epoch": 0.13474576271186442, + "grad_norm": 0.11590218855503849, + "learning_rate": 2.6949152542372884e-05, + "loss": 0.0125, + "step": 159 + }, + { + "clip_ratio": 0.14935636520385742, + "epoch": 0.13559322033898305, + "grad_norm": 0.16154268567658825, + "learning_rate": 2.711864406779661e-05, + "loss": 0.011, + "step": 160 + }, + { + "clip_ratio": 0.0009650280699133873, + "completion_length": 174.7678680419922, + "epoch": 0.13644067796610168, + "grad_norm": 0.15404950919743313, + "learning_rate": 2.728813559322034e-05, + "loss": 0.0078, + "num_tokens": 1185697.0, + "reward": 0.8571429252624512, + "reward_std": 0.3342905640602112, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 161 + }, + { + "clip_ratio": 0.004791476763784885, + "epoch": 0.13728813559322034, + "grad_norm": 0.12682344230599282, + "learning_rate": 2.7457627118644068e-05, + "loss": 0.0056, + "step": 162 + }, + { + "clip_ratio": 0.023417560383677483, + "epoch": 0.13813559322033897, + "grad_norm": 0.0948693079603576, + "learning_rate": 2.76271186440678e-05, + "loss": 0.003, + "step": 163 + }, + { + "clip_ratio": 0.07911951839923859, + "epoch": 0.13898305084745763, + "grad_norm": 0.09089932231497586, + "learning_rate": 2.7796610169491528e-05, + "loss": 0.0007, + "step": 164 + }, + { + "clip_ratio": 0.000979878008365631, + "completion_length": 126.64286041259766, + "epoch": 0.13983050847457626, + "grad_norm": 0.1801163708005843, + "learning_rate": 2.7966101694915258e-05, + "loss": -0.0396, + "num_tokens": 1199565.0, + "reward": 0.7500000596046448, + "reward_std": 0.3859959840774536, + "rewards/check_winston_local_func/mean": 0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 165 + }, + { + "clip_ratio": 0.009913308545947075, + "epoch": 0.14067796610169492, + "grad_norm": 0.14588220837158195, + "learning_rate": 2.8135593220338985e-05, + "loss": -0.0428, + "step": 166 + }, + { + "clip_ratio": 0.07110879570245743, + "epoch": 0.14152542372881355, + "grad_norm": 0.276973278154756, + "learning_rate": 2.8305084745762715e-05, + "loss": -0.0441, + "step": 167 + }, + { + "clip_ratio": 0.06909574568271637, + "epoch": 0.1423728813559322, + "grad_norm": 0.12488402451050255, + "learning_rate": 2.8474576271186442e-05, + "loss": -0.0494, + "step": 168 + }, + { + "clip_ratio": 0.0003819709818344563, + "completion_length": 152.73214721679688, + "epoch": 0.14322033898305084, + "grad_norm": 0.3195642927880649, + "learning_rate": 2.8644067796610172e-05, + "loss": 0.0302, + "num_tokens": 1214790.0, + "reward": 0.7142857313156128, + "reward_std": 0.3342905342578888, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 169 + }, + { + "clip_ratio": 0.015475978143513203, + "epoch": 0.1440677966101695, + "grad_norm": 0.2360393211362849, + "learning_rate": 2.88135593220339e-05, + "loss": 0.0228, + "step": 170 + }, + { + "clip_ratio": 0.08493895828723907, + "epoch": 0.14491525423728813, + "grad_norm": 0.17350104363138513, + "learning_rate": 2.8983050847457626e-05, + "loss": 0.0163, + "step": 171 + }, + { + "clip_ratio": 0.14768318831920624, + "epoch": 0.14576271186440679, + "grad_norm": 0.19569281232532856, + "learning_rate": 2.9152542372881356e-05, + "loss": 0.013, + "step": 172 + }, + { + "clip_ratio": 0.006150017958134413, + "completion_length": 186.25001525878906, + "epoch": 0.14661016949152542, + "grad_norm": 0.06068449124289285, + "learning_rate": 2.932203389830509e-05, + "loss": -0.0169, + "num_tokens": 1232564.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 173 + }, + { + "clip_ratio": 0.014521388337016106, + "epoch": 0.14745762711864407, + "grad_norm": 0.05994459441740582, + "learning_rate": 2.9491525423728817e-05, + "loss": -0.0174, + "step": 174 + }, + { + "clip_ratio": 0.04354570060968399, + "epoch": 0.1483050847457627, + "grad_norm": 0.06278027199945566, + "learning_rate": 2.9661016949152547e-05, + "loss": -0.0183, + "step": 175 + }, + { + "clip_ratio": 0.10504651814699173, + "epoch": 0.14915254237288136, + "grad_norm": 0.04416483226500781, + "learning_rate": 2.9830508474576274e-05, + "loss": -0.0193, + "step": 176 + }, + { + "clip_ratio": 0.003162125591188669, + "completion_length": 163.17857360839844, + "epoch": 0.15, + "grad_norm": 0.11359153510598317, + "learning_rate": 3.0000000000000004e-05, + "loss": -0.0335, + "num_tokens": 1248550.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 177 + }, + { + "clip_ratio": 0.01111722644418478, + "epoch": 0.15084745762711865, + "grad_norm": 0.10416258904679447, + "learning_rate": 3.016949152542373e-05, + "loss": -0.0347, + "step": 178 + }, + { + "clip_ratio": 0.04117439687252045, + "epoch": 0.15169491525423728, + "grad_norm": 0.08204255975558637, + "learning_rate": 3.0338983050847458e-05, + "loss": -0.0364, + "step": 179 + }, + { + "clip_ratio": 0.08657827973365784, + "epoch": 0.15254237288135594, + "grad_norm": 0.08178448057500348, + "learning_rate": 3.0508474576271188e-05, + "loss": -0.038, + "step": 180 + }, + { + "clip_ratio": 0.010199248790740967, + "completion_length": 162.35714721679688, + "epoch": 0.15338983050847457, + "grad_norm": 0.4819050859019718, + "learning_rate": 3.067796610169492e-05, + "loss": 0.06, + "num_tokens": 1264994.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 181 + }, + { + "clip_ratio": 0.060436759144067764, + "epoch": 0.15423728813559323, + "grad_norm": 0.1959898799735129, + "learning_rate": 3.084745762711865e-05, + "loss": 0.0533, + "step": 182 + }, + { + "clip_ratio": 0.13463598489761353, + "epoch": 0.15508474576271186, + "grad_norm": 0.12678282333898375, + "learning_rate": 3.101694915254238e-05, + "loss": 0.0482, + "step": 183 + }, + { + "clip_ratio": 0.19176946580410004, + "epoch": 0.15593220338983052, + "grad_norm": 0.10756609820315277, + "learning_rate": 3.1186440677966106e-05, + "loss": 0.0463, + "step": 184 + }, + { + "clip_ratio": 0.0008241009199991822, + "completion_length": 237.60714721679688, + "epoch": 0.15677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.135593220338983e-05, + "loss": 0.0, + "num_tokens": 1286164.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 185 + }, + { + "clip_ratio": 0.002994579030200839, + "epoch": 0.1576271186440678, + "grad_norm": 0.0, + "learning_rate": 3.152542372881356e-05, + "loss": 0.0, + "step": 186 + }, + { + "clip_ratio": 0.00574068445712328, + "epoch": 0.15847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.169491525423729e-05, + "loss": 0.0, + "step": 187 + }, + { + "clip_ratio": 0.012791804037988186, + "epoch": 0.15932203389830507, + "grad_norm": 0.0, + "learning_rate": 3.186440677966102e-05, + "loss": 0.0, + "step": 188 + }, + { + "clip_ratio": 0.006764067802578211, + "completion_length": 143.94644165039062, + "epoch": 0.16016949152542373, + "grad_norm": 0.04704135237627796, + "learning_rate": 3.203389830508475e-05, + "loss": -0.0095, + "num_tokens": 1301409.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 189 + }, + { + "clip_ratio": 0.013139193877577782, + "epoch": 0.16101694915254236, + "grad_norm": 0.04532372769932697, + "learning_rate": 3.2203389830508473e-05, + "loss": -0.0098, + "step": 190 + }, + { + "clip_ratio": 0.03423069044947624, + "epoch": 0.16186440677966102, + "grad_norm": 0.040646403971755785, + "learning_rate": 3.237288135593221e-05, + "loss": -0.0105, + "step": 191 + }, + { + "clip_ratio": 0.06455554068088531, + "epoch": 0.16271186440677965, + "grad_norm": 0.03643001220928061, + "learning_rate": 3.2542372881355934e-05, + "loss": -0.0113, + "step": 192 + }, + { + "clip_ratio": 0.0007823093910701573, + "completion_length": 229.9107208251953, + "epoch": 0.1635593220338983, + "grad_norm": 0.0, + "learning_rate": 3.271186440677967e-05, + "loss": 0.0, + "num_tokens": 1321708.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 193 + }, + { + "clip_ratio": 0.0008988279732875526, + "epoch": 0.16440677966101694, + "grad_norm": 0.0, + "learning_rate": 3.2881355932203394e-05, + "loss": 0.0, + "step": 194 + }, + { + "clip_ratio": 0.003465626621618867, + "epoch": 0.1652542372881356, + "grad_norm": 0.0, + "learning_rate": 3.305084745762712e-05, + "loss": 0.0, + "step": 195 + }, + { + "clip_ratio": 0.008655412122607231, + "epoch": 0.16610169491525423, + "grad_norm": 0.0, + "learning_rate": 3.322033898305085e-05, + "loss": 0.0, + "step": 196 + }, + { + "clip_ratio": 0.0021059864666312933, + "completion_length": 166.85714721679688, + "epoch": 0.1669491525423729, + "grad_norm": 0.17307734331449404, + "learning_rate": 3.338983050847458e-05, + "loss": -0.0036, + "num_tokens": 1338540.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 197 + }, + { + "clip_ratio": 0.009274979121983051, + "epoch": 0.16779661016949152, + "grad_norm": 0.10860266060182006, + "learning_rate": 3.355932203389831e-05, + "loss": -0.0066, + "step": 198 + }, + { + "clip_ratio": 0.03715561330318451, + "epoch": 0.16864406779661018, + "grad_norm": 0.09136703784102146, + "learning_rate": 3.3728813559322035e-05, + "loss": -0.008, + "step": 199 + }, + { + "clip_ratio": 0.06759678572416306, + "epoch": 0.1694915254237288, + "grad_norm": 0.08121070179066665, + "learning_rate": 3.389830508474576e-05, + "loss": -0.009, + "step": 200 + }, + { + "clip_ratio": 0.0002369106950936839, + "completion_length": 132.625, + "epoch": 0.17033898305084746, + "grad_norm": 0.12222790896016958, + "learning_rate": 3.406779661016949e-05, + "loss": 0.0931, + "num_tokens": 1352735.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 201 + }, + { + "clip_ratio": 0.0015077884308993816, + "epoch": 0.1711864406779661, + "grad_norm": 0.12028738542362348, + "learning_rate": 3.423728813559322e-05, + "loss": 0.0915, + "step": 202 + }, + { + "clip_ratio": 0.011990153230726719, + "epoch": 0.17203389830508475, + "grad_norm": 0.10639183565121645, + "learning_rate": 3.4406779661016956e-05, + "loss": 0.0873, + "step": 203 + }, + { + "clip_ratio": 0.05813857913017273, + "epoch": 0.17288135593220338, + "grad_norm": 0.08983262526351615, + "learning_rate": 3.457627118644068e-05, + "loss": 0.0833, + "step": 204 + }, + { + "clip_ratio": 0.001714512356556952, + "completion_length": 90.37500762939453, + "epoch": 0.17372881355932204, + "grad_norm": 0.10843637606790192, + "learning_rate": 3.474576271186441e-05, + "loss": 0.0164, + "num_tokens": 1365892.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 205 + }, + { + "clip_ratio": 0.017641481012105942, + "epoch": 0.17457627118644067, + "grad_norm": 0.08303991648667351, + "learning_rate": 3.491525423728814e-05, + "loss": 0.0148, + "step": 206 + }, + { + "clip_ratio": 0.15765391290187836, + "epoch": 0.17542372881355933, + "grad_norm": 0.07279655924549996, + "learning_rate": 3.5084745762711864e-05, + "loss": 0.0138, + "step": 207 + }, + { + "clip_ratio": 0.2804856598377228, + "epoch": 0.17627118644067796, + "grad_norm": 0.09315271598947107, + "learning_rate": 3.52542372881356e-05, + "loss": 0.0135, + "step": 208 + }, + { + "clip_ratio": 0.003159541869536042, + "completion_length": 67.30357360839844, + "epoch": 0.17711864406779662, + "grad_norm": 0.0, + "learning_rate": 3.5423728813559324e-05, + "loss": 0.0, + "num_tokens": 1376973.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 209 + }, + { + "clip_ratio": 0.004362096078693867, + "epoch": 0.17796610169491525, + "grad_norm": 0.0, + "learning_rate": 3.559322033898305e-05, + "loss": 0.0, + "step": 210 + }, + { + "clip_ratio": 0.01770210638642311, + "epoch": 0.1788135593220339, + "grad_norm": 0.0, + "learning_rate": 3.576271186440678e-05, + "loss": 0.0, + "step": 211 + }, + { + "clip_ratio": 0.035751208662986755, + "epoch": 0.17966101694915254, + "grad_norm": 0.0, + "learning_rate": 3.593220338983051e-05, + "loss": 0.0, + "step": 212 + }, + { + "clip_ratio": 0.0035622839350253344, + "completion_length": 65.08928680419922, + "epoch": 0.1805084745762712, + "grad_norm": 0.0, + "learning_rate": 3.610169491525424e-05, + "loss": 0.0, + "num_tokens": 1387570.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 213 + }, + { + "clip_ratio": 0.0025523039512336254, + "epoch": 0.18135593220338983, + "grad_norm": 0.0, + "learning_rate": 3.627118644067797e-05, + "loss": 0.0, + "step": 214 + }, + { + "clip_ratio": 0.005835308227688074, + "epoch": 0.18220338983050846, + "grad_norm": 0.0, + "learning_rate": 3.64406779661017e-05, + "loss": 0.0, + "step": 215 + }, + { + "clip_ratio": 0.011904297396540642, + "epoch": 0.18305084745762712, + "grad_norm": 0.0, + "learning_rate": 3.6610169491525426e-05, + "loss": 0.0, + "step": 216 + }, + { + "clip_ratio": 0.0003092146071139723, + "completion_length": 51.10714340209961, + "epoch": 0.18389830508474575, + "grad_norm": 0.356952256149441, + "learning_rate": 3.677966101694915e-05, + "loss": -0.0154, + "num_tokens": 1397320.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 217 + }, + { + "clip_ratio": 0.11051050573587418, + "epoch": 0.1847457627118644, + "grad_norm": 0.1542677635762948, + "learning_rate": 3.6949152542372886e-05, + "loss": -0.019, + "step": 218 + }, + { + "clip_ratio": 0.18382969498634338, + "epoch": 0.18559322033898304, + "grad_norm": 0.13966519767464722, + "learning_rate": 3.711864406779661e-05, + "loss": -0.0211, + "step": 219 + }, + { + "clip_ratio": 0.27009809017181396, + "epoch": 0.1864406779661017, + "grad_norm": 0.08132731257822706, + "learning_rate": 3.728813559322034e-05, + "loss": -0.0238, + "step": 220 + }, + { + "clip_ratio": 0.006028716918081045, + "completion_length": 83.26786041259766, + "epoch": 0.18728813559322033, + "grad_norm": 0.0, + "learning_rate": 3.745762711864407e-05, + "loss": 0.0, + "num_tokens": 1409935.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 221 + }, + { + "clip_ratio": 0.021660711616277695, + "epoch": 0.188135593220339, + "grad_norm": 0.0, + "learning_rate": 3.76271186440678e-05, + "loss": 0.0, + "step": 222 + }, + { + "clip_ratio": 0.06699295341968536, + "epoch": 0.18898305084745762, + "grad_norm": 0.0, + "learning_rate": 3.779661016949153e-05, + "loss": 0.0, + "step": 223 + }, + { + "clip_ratio": 0.1347362995147705, + "epoch": 0.18983050847457628, + "grad_norm": 0.0, + "learning_rate": 3.796610169491526e-05, + "loss": 0.0, + "step": 224 + }, + { + "clip_ratio": 0.0006836191168986261, + "completion_length": 97.92857360839844, + "epoch": 0.1906779661016949, + "grad_norm": 0.05873233342660551, + "learning_rate": 3.813559322033899e-05, + "loss": 0.0388, + "num_tokens": 1422307.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 225 + }, + { + "clip_ratio": 0.0010315729305148125, + "epoch": 0.19152542372881357, + "grad_norm": 0.05937392738616397, + "learning_rate": 3.8305084745762714e-05, + "loss": 0.0386, + "step": 226 + }, + { + "clip_ratio": 0.01008252426981926, + "epoch": 0.1923728813559322, + "grad_norm": 0.0544038037479039, + "learning_rate": 3.847457627118644e-05, + "loss": 0.0371, + "step": 227 + }, + { + "clip_ratio": 0.0420512929558754, + "epoch": 0.19322033898305085, + "grad_norm": 0.047388133840752925, + "learning_rate": 3.8644067796610175e-05, + "loss": 0.0356, + "step": 228 + }, + { + "clip_ratio": 0.003829076187685132, + "completion_length": 48.48214340209961, + "epoch": 0.19406779661016949, + "grad_norm": 0.24338559301731436, + "learning_rate": 3.88135593220339e-05, + "loss": -0.0201, + "num_tokens": 1432182.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 229 + }, + { + "clip_ratio": 0.041657134890556335, + "epoch": 0.19491525423728814, + "grad_norm": 0.15635724094524717, + "learning_rate": 3.898305084745763e-05, + "loss": -0.026, + "step": 230 + }, + { + "clip_ratio": 0.16935327649116516, + "epoch": 0.19576271186440677, + "grad_norm": 0.11486942308015832, + "learning_rate": 3.9152542372881355e-05, + "loss": -0.0295, + "step": 231 + }, + { + "clip_ratio": 0.22958868741989136, + "epoch": 0.19661016949152543, + "grad_norm": 0.10892713241904037, + "learning_rate": 3.932203389830509e-05, + "loss": -0.0314, + "step": 232 + }, + { + "clip_ratio": 0.003605353645980358, + "completion_length": 66.10714721679688, + "epoch": 0.19745762711864406, + "grad_norm": 0.24973476558992524, + "learning_rate": 3.9491525423728816e-05, + "loss": -0.0118, + "num_tokens": 1443140.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 233 + }, + { + "clip_ratio": 0.029195427894592285, + "epoch": 0.19830508474576272, + "grad_norm": 0.1595699714332021, + "learning_rate": 3.966101694915255e-05, + "loss": -0.0189, + "step": 234 + }, + { + "clip_ratio": 0.10283487290143967, + "epoch": 0.19915254237288135, + "grad_norm": 0.11474727019285232, + "learning_rate": 3.9830508474576276e-05, + "loss": -0.0247, + "step": 235 + }, + { + "clip_ratio": 0.15862122178077698, + "epoch": 0.2, + "grad_norm": 0.10043744347803148, + "learning_rate": 4e-05, + "loss": -0.029, + "step": 236 + }, + { + "clip_ratio": 0.0013605443527922034, + "completion_length": 74.55357360839844, + "epoch": 0.20084745762711864, + "grad_norm": 0.13987954732136554, + "learning_rate": 3.9981167608286254e-05, + "loss": -0.0433, + "num_tokens": 1454515.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 237 + }, + { + "clip_ratio": 0.010904515162110329, + "epoch": 0.2016949152542373, + "grad_norm": 0.10644135758363778, + "learning_rate": 3.9962335216572505e-05, + "loss": -0.0464, + "step": 238 + }, + { + "clip_ratio": 0.05173995718359947, + "epoch": 0.20254237288135593, + "grad_norm": 0.08987160994189367, + "learning_rate": 3.994350282485876e-05, + "loss": -0.0494, + "step": 239 + }, + { + "clip_ratio": 0.10260221362113953, + "epoch": 0.2033898305084746, + "grad_norm": 0.07087528775663905, + "learning_rate": 3.9924670433145014e-05, + "loss": -0.0523, + "step": 240 + }, + { + "clip_ratio": 0.0006479613948613405, + "completion_length": 73.28572082519531, + "epoch": 0.20423728813559322, + "grad_norm": 0.3445626306759668, + "learning_rate": 3.9905838041431265e-05, + "loss": 0.0365, + "num_tokens": 1466203.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 241 + }, + { + "clip_ratio": 0.061018019914627075, + "epoch": 0.20508474576271185, + "grad_norm": 0.17586034661375718, + "learning_rate": 3.9887005649717516e-05, + "loss": 0.0285, + "step": 242 + }, + { + "clip_ratio": 0.21443673968315125, + "epoch": 0.2059322033898305, + "grad_norm": 0.16852265377216533, + "learning_rate": 3.986817325800377e-05, + "loss": 0.0245, + "step": 243 + }, + { + "clip_ratio": 0.3032749891281128, + "epoch": 0.20677966101694914, + "grad_norm": 0.16538030606379006, + "learning_rate": 3.984934086629002e-05, + "loss": 0.0219, + "step": 244 + }, + { + "clip_ratio": 0.001159251551143825, + "completion_length": 80.375, + "epoch": 0.2076271186440678, + "grad_norm": 0.17416776142167675, + "learning_rate": 3.9830508474576276e-05, + "loss": -0.0293, + "num_tokens": 1477712.0, + "reward": 0.8928571939468384, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 245 + }, + { + "clip_ratio": 0.018606197088956833, + "epoch": 0.20847457627118643, + "grad_norm": 0.13288705840160372, + "learning_rate": 3.981167608286253e-05, + "loss": -0.0342, + "step": 246 + }, + { + "clip_ratio": 0.07409250736236572, + "epoch": 0.2093220338983051, + "grad_norm": 0.11062194988477761, + "learning_rate": 3.979284369114878e-05, + "loss": -0.0387, + "step": 247 + }, + { + "clip_ratio": 0.13684464991092682, + "epoch": 0.21016949152542372, + "grad_norm": 0.09630587836377022, + "learning_rate": 3.9774011299435036e-05, + "loss": -0.0423, + "step": 248 + }, + { + "clip_ratio": 0.0009059179574251175, + "completion_length": 55.892860412597656, + "epoch": 0.21101694915254238, + "grad_norm": 0.30277389882138056, + "learning_rate": 3.975517890772128e-05, + "loss": -0.0232, + "num_tokens": 1489946.0, + "reward": 0.8214285969734192, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": 0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 249 + }, + { + "clip_ratio": 0.05273974686861038, + "epoch": 0.211864406779661, + "grad_norm": 0.20551737732816863, + "learning_rate": 3.973634651600754e-05, + "loss": -0.0352, + "step": 250 + }, + { + "clip_ratio": 0.15495876967906952, + "epoch": 0.21271186440677967, + "grad_norm": 0.23192855972985502, + "learning_rate": 3.971751412429379e-05, + "loss": -0.0428, + "step": 251 + }, + { + "clip_ratio": 0.17651182413101196, + "epoch": 0.2135593220338983, + "grad_norm": 0.153802982923592, + "learning_rate": 3.969868173258004e-05, + "loss": -0.0503, + "step": 252 + }, + { + "clip_ratio": 0.001365313190035522, + "completion_length": 74.5, + "epoch": 0.21440677966101696, + "grad_norm": 0.1294886128912843, + "learning_rate": 3.967984934086629e-05, + "loss": -0.0165, + "num_tokens": 1501150.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 253 + }, + { + "clip_ratio": 0.029109954833984375, + "epoch": 0.21525423728813559, + "grad_norm": 0.07796443960667383, + "learning_rate": 3.966101694915255e-05, + "loss": -0.0187, + "step": 254 + }, + { + "clip_ratio": 0.1373310536146164, + "epoch": 0.21610169491525424, + "grad_norm": 0.0719203190228422, + "learning_rate": 3.9642184557438794e-05, + "loss": -0.0211, + "step": 255 + }, + { + "clip_ratio": 0.24434244632720947, + "epoch": 0.21694915254237288, + "grad_norm": 0.0766668656235949, + "learning_rate": 3.962335216572505e-05, + "loss": -0.023, + "step": 256 + }, + { + "clip_ratio": 0.0014079277170822024, + "completion_length": 76.76786041259766, + "epoch": 0.21779661016949153, + "grad_norm": 0.14896557612902658, + "learning_rate": 3.96045197740113e-05, + "loss": -0.048, + "num_tokens": 1513457.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 257 + }, + { + "clip_ratio": 0.008702627383172512, + "epoch": 0.21864406779661016, + "grad_norm": 0.12790944068712845, + "learning_rate": 3.9585687382297554e-05, + "loss": -0.0515, + "step": 258 + }, + { + "clip_ratio": 0.05537901073694229, + "epoch": 0.21949152542372882, + "grad_norm": 0.0901106115692995, + "learning_rate": 3.956685499058381e-05, + "loss": -0.0559, + "step": 259 + }, + { + "clip_ratio": 0.12627661228179932, + "epoch": 0.22033898305084745, + "grad_norm": 0.08842019141814955, + "learning_rate": 3.954802259887006e-05, + "loss": -0.0589, + "step": 260 + }, + { + "clip_ratio": 0.001680672401562333, + "completion_length": 68.46428680419922, + "epoch": 0.2211864406779661, + "grad_norm": 0.45455295411779023, + "learning_rate": 3.9529190207156314e-05, + "loss": 0.0253, + "num_tokens": 1523675.0, + "reward": 0.785714328289032, + "reward_std": 0.35475122928619385, + "rewards/check_winston_local_func/mean": 0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241878271102905, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 261 + }, + { + "clip_ratio": 0.08457090705633163, + "epoch": 0.22203389830508474, + "grad_norm": 0.2750192097608069, + "learning_rate": 3.9510357815442565e-05, + "loss": 0.0122, + "step": 262 + }, + { + "clip_ratio": 0.14618617296218872, + "epoch": 0.2228813559322034, + "grad_norm": 0.21776056884947845, + "learning_rate": 3.9491525423728816e-05, + "loss": 0.0035, + "step": 263 + }, + { + "clip_ratio": 0.16797243058681488, + "epoch": 0.22372881355932203, + "grad_norm": 0.1552963639704198, + "learning_rate": 3.947269303201507e-05, + "loss": -0.0027, + "step": 264 + }, + { + "clip_ratio": 0.0041149333119392395, + "completion_length": 92.05357360839844, + "epoch": 0.2245762711864407, + "grad_norm": 0.0, + "learning_rate": 3.9453860640301325e-05, + "loss": 0.0, + "num_tokens": 1537926.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 265 + }, + { + "clip_ratio": 0.015582824125885963, + "epoch": 0.22542372881355932, + "grad_norm": 0.0, + "learning_rate": 3.943502824858757e-05, + "loss": 0.0, + "step": 266 + }, + { + "clip_ratio": 0.05549276992678642, + "epoch": 0.22627118644067798, + "grad_norm": 0.0, + "learning_rate": 3.941619585687383e-05, + "loss": 0.0, + "step": 267 + }, + { + "clip_ratio": 0.10348478704690933, + "epoch": 0.2271186440677966, + "grad_norm": 0.0, + "learning_rate": 3.939736346516008e-05, + "loss": 0.0, + "step": 268 + }, + { + "clip_ratio": 0.0024110758677124977, + "completion_length": 132.5357208251953, + "epoch": 0.22796610169491524, + "grad_norm": 0.09866009376925343, + "learning_rate": 3.937853107344633e-05, + "loss": -0.0252, + "num_tokens": 1552892.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 269 + }, + { + "clip_ratio": 0.009020349942147732, + "epoch": 0.2288135593220339, + "grad_norm": 0.08611769992817986, + "learning_rate": 3.935969868173259e-05, + "loss": -0.0267, + "step": 270 + }, + { + "clip_ratio": 0.03130246326327324, + "epoch": 0.22966101694915253, + "grad_norm": 0.08283957691220468, + "learning_rate": 3.934086629001884e-05, + "loss": -0.0288, + "step": 271 + }, + { + "clip_ratio": 0.06420135498046875, + "epoch": 0.2305084745762712, + "grad_norm": 0.07319871503015539, + "learning_rate": 3.932203389830509e-05, + "loss": -0.0314, + "step": 272 + }, + { + "clip_ratio": 0.0030047716572880745, + "completion_length": 85.9464340209961, + "epoch": 0.23135593220338982, + "grad_norm": 0.42479217252605955, + "learning_rate": 3.930320150659134e-05, + "loss": 0.0151, + "num_tokens": 1564737.0, + "reward": 0.8571429252624512, + "reward_std": 0.3342905342578888, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 273 + }, + { + "clip_ratio": 0.046567775309085846, + "epoch": 0.23220338983050848, + "grad_norm": 0.25875493543994865, + "learning_rate": 3.928436911487759e-05, + "loss": 0.0039, + "step": 274 + }, + { + "clip_ratio": 0.11683137714862823, + "epoch": 0.2330508474576271, + "grad_norm": 0.15568587648106266, + "learning_rate": 3.926553672316384e-05, + "loss": -0.0039, + "step": 275 + }, + { + "clip_ratio": 0.1598564237356186, + "epoch": 0.23389830508474577, + "grad_norm": 0.12141989924883649, + "learning_rate": 3.92467043314501e-05, + "loss": -0.0076, + "step": 276 + }, + { + "clip_ratio": 0.001374253653921187, + "completion_length": 122.96429443359375, + "epoch": 0.2347457627118644, + "grad_norm": 0.18782615643703865, + "learning_rate": 3.922787193973635e-05, + "loss": -0.026, + "num_tokens": 1579727.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 277 + }, + { + "clip_ratio": 0.017132315784692764, + "epoch": 0.23559322033898306, + "grad_norm": 0.12401604739808049, + "learning_rate": 3.92090395480226e-05, + "loss": -0.031, + "step": 278 + }, + { + "clip_ratio": 0.05705662816762924, + "epoch": 0.2364406779661017, + "grad_norm": 0.0762353013620226, + "learning_rate": 3.919020715630885e-05, + "loss": -0.034, + "step": 279 + }, + { + "clip_ratio": 0.09824671596288681, + "epoch": 0.23728813559322035, + "grad_norm": 0.07414316824181627, + "learning_rate": 3.9171374764595104e-05, + "loss": -0.0361, + "step": 280 + }, + { + "clip_ratio": 0.006891847122460604, + "completion_length": 111.14286041259766, + "epoch": 0.23813559322033898, + "grad_norm": 0.44887370177976565, + "learning_rate": 3.9152542372881355e-05, + "loss": -0.0147, + "num_tokens": 1592855.0, + "reward": 0.7500000596046448, + "reward_std": 0.637336254119873, + "rewards/check_winston_local_func/mean": 0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 281 + }, + { + "clip_ratio": 0.0862836092710495, + "epoch": 0.23898305084745763, + "grad_norm": 0.2905997622704694, + "learning_rate": 3.913370998116761e-05, + "loss": -0.0255, + "step": 282 + }, + { + "clip_ratio": 0.13213881850242615, + "epoch": 0.23983050847457626, + "grad_norm": 0.20046065755709935, + "learning_rate": 3.9114877589453864e-05, + "loss": -0.036, + "step": 283 + }, + { + "clip_ratio": 0.20514217019081116, + "epoch": 0.24067796610169492, + "grad_norm": 0.17822083347245274, + "learning_rate": 3.9096045197740115e-05, + "loss": -0.0417, + "step": 284 + }, + { + "clip_ratio": 0.0007382220355793834, + "completion_length": 90.14286041259766, + "epoch": 0.24152542372881355, + "grad_norm": 0.05844943977633127, + "learning_rate": 3.907721280602637e-05, + "loss": -0.0145, + "num_tokens": 1605287.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 285 + }, + { + "clip_ratio": 0.006259975954890251, + "epoch": 0.2423728813559322, + "grad_norm": 0.04883518588447698, + "learning_rate": 3.905838041431262e-05, + "loss": -0.015, + "step": 286 + }, + { + "clip_ratio": 0.023042459040880203, + "epoch": 0.24322033898305084, + "grad_norm": 0.04225419018938037, + "learning_rate": 3.9039548022598875e-05, + "loss": -0.0156, + "step": 287 + }, + { + "clip_ratio": 0.04386242851614952, + "epoch": 0.2440677966101695, + "grad_norm": 0.03866602121110847, + "learning_rate": 3.9020715630885127e-05, + "loss": -0.0163, + "step": 288 + }, + { + "clip_ratio": 0.0029555519577115774, + "completion_length": 144.98214721679688, + "epoch": 0.24491525423728813, + "grad_norm": 0.1262690850004453, + "learning_rate": 3.900188323917138e-05, + "loss": -0.0343, + "num_tokens": 1620742.0, + "reward": 0.8571429252624512, + "reward_std": 0.4040610194206238, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 289 + }, + { + "clip_ratio": 0.0132750254124403, + "epoch": 0.2457627118644068, + "grad_norm": 0.10191416605166659, + "learning_rate": 3.898305084745763e-05, + "loss": -0.0377, + "step": 290 + }, + { + "clip_ratio": 0.043000176548957825, + "epoch": 0.24661016949152542, + "grad_norm": 0.08414775760035112, + "learning_rate": 3.896421845574388e-05, + "loss": -0.0405, + "step": 291 + }, + { + "clip_ratio": 0.07677298784255981, + "epoch": 0.24745762711864408, + "grad_norm": 0.07673330413564883, + "learning_rate": 3.894538606403013e-05, + "loss": -0.0436, + "step": 292 + }, + { + "clip_ratio": 0.0007677033427171409, + "completion_length": 314.64288330078125, + "epoch": 0.2483050847457627, + "grad_norm": 0.05727067166697756, + "learning_rate": 3.892655367231639e-05, + "loss": 0.0061, + "num_tokens": 1645538.0, + "reward": 0.8571429252624512, + "reward_std": 0.15272071957588196, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 293 + }, + { + "clip_ratio": 0.000989561784081161, + "epoch": 0.24915254237288137, + "grad_norm": 0.05658381403070837, + "learning_rate": 3.890772128060264e-05, + "loss": 0.0055, + "step": 294 + }, + { + "clip_ratio": 0.002105026040226221, + "epoch": 0.25, + "grad_norm": 0.05358318750720369, + "learning_rate": 3.888888888888889e-05, + "loss": 0.0045, + "step": 295 + }, + { + "clip_ratio": 0.008737047202885151, + "epoch": 0.25084745762711863, + "grad_norm": 0.04703537375755522, + "learning_rate": 3.887005649717515e-05, + "loss": 0.003, + "step": 296 + }, + { + "clip_ratio": 0.001839210744947195, + "completion_length": 134.08929443359375, + "epoch": 0.25169491525423726, + "grad_norm": 0.13649134617830305, + "learning_rate": 3.885122410546139e-05, + "loss": -0.0194, + "num_tokens": 1660599.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 297 + }, + { + "clip_ratio": 0.009821072220802307, + "epoch": 0.25254237288135595, + "grad_norm": 0.0691252012642643, + "learning_rate": 3.883239171374765e-05, + "loss": -0.0207, + "step": 298 + }, + { + "clip_ratio": 0.027830438688397408, + "epoch": 0.2533898305084746, + "grad_norm": 0.04974246695392892, + "learning_rate": 3.88135593220339e-05, + "loss": -0.0215, + "step": 299 + }, + { + "clip_ratio": 0.05817332863807678, + "epoch": 0.2542372881355932, + "grad_norm": 0.04524622630022836, + "learning_rate": 3.879472693032015e-05, + "loss": -0.022, + "step": 300 + }, + { + "clip_ratio": 0.001304431352764368, + "completion_length": 191.7678680419922, + "epoch": 0.25508474576271184, + "grad_norm": 0.07787541743964174, + "learning_rate": 3.8775894538606404e-05, + "loss": -0.0228, + "num_tokens": 1680114.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 301 + }, + { + "clip_ratio": 0.004079771228134632, + "epoch": 0.2559322033898305, + "grad_norm": 0.05792631765695611, + "learning_rate": 3.875706214689266e-05, + "loss": -0.0237, + "step": 302 + }, + { + "clip_ratio": 0.01483174879103899, + "epoch": 0.25677966101694916, + "grad_norm": 0.04458591657155666, + "learning_rate": 3.8738229755178906e-05, + "loss": -0.0243, + "step": 303 + }, + { + "clip_ratio": 0.03137718886137009, + "epoch": 0.2576271186440678, + "grad_norm": 0.03880399913720864, + "learning_rate": 3.8719397363465164e-05, + "loss": -0.0248, + "step": 304 + }, + { + "clip_ratio": 0.0008109563495963812, + "completion_length": 160.80357360839844, + "epoch": 0.2584745762711864, + "grad_norm": 0.20027349236527825, + "learning_rate": 3.8700564971751415e-05, + "loss": -0.0224, + "num_tokens": 1697895.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 305 + }, + { + "clip_ratio": 0.0038879578933119774, + "epoch": 0.2593220338983051, + "grad_norm": 0.07886223922966976, + "learning_rate": 3.8681732580037666e-05, + "loss": -0.0235, + "step": 306 + }, + { + "clip_ratio": 0.014749433845281601, + "epoch": 0.26016949152542374, + "grad_norm": 0.06786394961277091, + "learning_rate": 3.8662900188323924e-05, + "loss": -0.0247, + "step": 307 + }, + { + "clip_ratio": 0.03604491055011749, + "epoch": 0.26101694915254237, + "grad_norm": 0.041254536906425165, + "learning_rate": 3.8644067796610175e-05, + "loss": -0.0257, + "step": 308 + }, + { + "clip_ratio": 0.000806977681349963, + "completion_length": 233.62501525878906, + "epoch": 0.261864406779661, + "grad_norm": 0.08944516343119709, + "learning_rate": 3.8625235404896426e-05, + "loss": -0.0013, + "num_tokens": 1719706.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 309 + }, + { + "clip_ratio": 0.0043969168327748775, + "epoch": 0.2627118644067797, + "grad_norm": 0.08008566583785545, + "learning_rate": 3.860640301318268e-05, + "loss": -0.0023, + "step": 310 + }, + { + "clip_ratio": 0.016115259379148483, + "epoch": 0.2635593220338983, + "grad_norm": 0.06746261744912278, + "learning_rate": 3.858757062146893e-05, + "loss": -0.0044, + "step": 311 + }, + { + "clip_ratio": 0.029712393879890442, + "epoch": 0.26440677966101694, + "grad_norm": 0.05954201685230599, + "learning_rate": 3.856873822975518e-05, + "loss": -0.0066, + "step": 312 + }, + { + "clip_ratio": 0.0019188802689313889, + "completion_length": 118.87500762939453, + "epoch": 0.2652542372881356, + "grad_norm": 0.2459608147132861, + "learning_rate": 3.854990583804144e-05, + "loss": 0.0138, + "num_tokens": 1733891.0, + "reward": 0.6785714626312256, + "reward_std": 0.39675766229629517, + "rewards/check_winston_local_func/mean": 0.6785714030265808, + "rewards/check_winston_local_func/std": 0.7411819696426392, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 313 + }, + { + "clip_ratio": 0.011992106214165688, + "epoch": 0.26610169491525426, + "grad_norm": 0.1848848068319577, + "learning_rate": 3.853107344632769e-05, + "loss": 0.0079, + "step": 314 + }, + { + "clip_ratio": 0.05709777772426605, + "epoch": 0.2669491525423729, + "grad_norm": 0.146233205223168, + "learning_rate": 3.851224105461394e-05, + "loss": 0.0011, + "step": 315 + }, + { + "clip_ratio": 0.10807797312736511, + "epoch": 0.2677966101694915, + "grad_norm": 0.16941844805388373, + "learning_rate": 3.849340866290019e-05, + "loss": -0.0039, + "step": 316 + }, + { + "clip_ratio": 0.0004728636995423585, + "completion_length": 202.80357360839844, + "epoch": 0.26864406779661015, + "grad_norm": 0.0, + "learning_rate": 3.847457627118644e-05, + "loss": 0.0, + "num_tokens": 1753696.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 317 + }, + { + "clip_ratio": 0.0017543105641379952, + "epoch": 0.26949152542372884, + "grad_norm": 0.0, + "learning_rate": 3.84557438794727e-05, + "loss": 0.0, + "step": 318 + }, + { + "clip_ratio": 0.0028700276743620634, + "epoch": 0.27033898305084747, + "grad_norm": 0.0, + "learning_rate": 3.843691148775895e-05, + "loss": 0.0, + "step": 319 + }, + { + "clip_ratio": 0.005041220691055059, + "epoch": 0.2711864406779661, + "grad_norm": 0.0, + "learning_rate": 3.84180790960452e-05, + "loss": 0.0, + "step": 320 + }, + { + "clip_ratio": 0.0006087662768550217, + "completion_length": 246.4285888671875, + "epoch": 0.27203389830508473, + "grad_norm": 0.02696783441349338, + "learning_rate": 3.839924670433145e-05, + "loss": -0.0045, + "num_tokens": 1776128.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 321 + }, + { + "clip_ratio": 0.0007834911812096834, + "epoch": 0.27288135593220336, + "grad_norm": 0.026934160082675403, + "learning_rate": 3.8380414312617703e-05, + "loss": -0.0046, + "step": 322 + }, + { + "clip_ratio": 0.0016883478965610266, + "epoch": 0.27372881355932205, + "grad_norm": 0.026427549336641994, + "learning_rate": 3.8361581920903955e-05, + "loss": -0.0047, + "step": 323 + }, + { + "clip_ratio": 0.004345850553363562, + "epoch": 0.2745762711864407, + "grad_norm": 0.026125606288862162, + "learning_rate": 3.834274952919021e-05, + "loss": -0.0052, + "step": 324 + }, + { + "clip_ratio": 0.0007442247588187456, + "completion_length": 262.21429443359375, + "epoch": 0.2754237288135593, + "grad_norm": 0.0, + "learning_rate": 3.8323917137476463e-05, + "loss": 0.0, + "num_tokens": 1797244.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 325 + }, + { + "clip_ratio": 0.000711060652974993, + "epoch": 0.27627118644067794, + "grad_norm": 0.0, + "learning_rate": 3.8305084745762714e-05, + "loss": 0.0, + "step": 326 + }, + { + "clip_ratio": 0.0014260082971304655, + "epoch": 0.2771186440677966, + "grad_norm": 0.0, + "learning_rate": 3.828625235404897e-05, + "loss": 0.0, + "step": 327 + }, + { + "clip_ratio": 0.001994561171159148, + "epoch": 0.27796610169491526, + "grad_norm": 0.0, + "learning_rate": 3.826741996233522e-05, + "loss": 0.0, + "step": 328 + }, + { + "clip_ratio": 0.001084706513211131, + "completion_length": 172.08929443359375, + "epoch": 0.2788135593220339, + "grad_norm": 0.0876294357565793, + "learning_rate": 3.8248587570621474e-05, + "loss": 0.0001, + "num_tokens": 1813569.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 329 + }, + { + "clip_ratio": 0.002805770607665181, + "epoch": 0.2796610169491525, + "grad_norm": 0.08561321931994853, + "learning_rate": 3.8229755178907726e-05, + "loss": -0.001, + "step": 330 + }, + { + "clip_ratio": 0.015290237963199615, + "epoch": 0.2805084745762712, + "grad_norm": 0.05204164151508037, + "learning_rate": 3.8210922787193977e-05, + "loss": -0.0022, + "step": 331 + }, + { + "clip_ratio": 0.02808833308517933, + "epoch": 0.28135593220338984, + "grad_norm": 0.047916681538162136, + "learning_rate": 3.819209039548023e-05, + "loss": -0.0028, + "step": 332 + }, + { + "clip_ratio": 0.00172739801928401, + "completion_length": 148.0178680419922, + "epoch": 0.28220338983050847, + "grad_norm": 0.0, + "learning_rate": 3.8173258003766486e-05, + "loss": 0.0, + "num_tokens": 1829578.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 333 + }, + { + "clip_ratio": 0.006786983925849199, + "epoch": 0.2830508474576271, + "grad_norm": 0.0, + "learning_rate": 3.815442561205273e-05, + "loss": 0.0, + "step": 334 + }, + { + "clip_ratio": 0.018832042813301086, + "epoch": 0.2838983050847458, + "grad_norm": 0.0, + "learning_rate": 3.813559322033899e-05, + "loss": 0.0, + "step": 335 + }, + { + "clip_ratio": 0.03414842113852501, + "epoch": 0.2847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.811676082862524e-05, + "loss": 0.0, + "step": 336 + }, + { + "clip_ratio": 0.0009491202072240412, + "completion_length": 218.2678680419922, + "epoch": 0.28559322033898304, + "grad_norm": 0.0, + "learning_rate": 3.809792843691149e-05, + "loss": 0.0, + "num_tokens": 1849113.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 337 + }, + { + "clip_ratio": 0.0021562932524830103, + "epoch": 0.2864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.807909604519775e-05, + "loss": 0.0, + "step": 338 + }, + { + "clip_ratio": 0.0030712890438735485, + "epoch": 0.28728813559322036, + "grad_norm": 0.0, + "learning_rate": 3.8060263653484e-05, + "loss": 0.0, + "step": 339 + }, + { + "clip_ratio": 0.0054122223518788815, + "epoch": 0.288135593220339, + "grad_norm": 0.0, + "learning_rate": 3.804143126177025e-05, + "loss": 0.0, + "step": 340 + }, + { + "clip_ratio": 0.00150212156586349, + "completion_length": 244.96429443359375, + "epoch": 0.2889830508474576, + "grad_norm": 0.0, + "learning_rate": 3.80225988700565e-05, + "loss": 0.0, + "num_tokens": 1869743.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 341 + }, + { + "clip_ratio": 0.0009405449964106083, + "epoch": 0.28983050847457625, + "grad_norm": 0.0, + "learning_rate": 3.800376647834275e-05, + "loss": 0.0, + "step": 342 + }, + { + "clip_ratio": 0.0021599442698061466, + "epoch": 0.29067796610169494, + "grad_norm": 0.0, + "learning_rate": 3.7984934086629e-05, + "loss": 0.0, + "step": 343 + }, + { + "clip_ratio": 0.002221164293587208, + "epoch": 0.29152542372881357, + "grad_norm": 0.0, + "learning_rate": 3.796610169491526e-05, + "loss": 0.0, + "step": 344 + }, + { + "clip_ratio": 0.0013717131223529577, + "completion_length": 223.3035888671875, + "epoch": 0.2923728813559322, + "grad_norm": 0.12633963981780688, + "learning_rate": 3.7947269303201505e-05, + "loss": 0.0043, + "num_tokens": 1889048.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 345 + }, + { + "clip_ratio": 0.004998536314815283, + "epoch": 0.29322033898305083, + "grad_norm": 0.09918328474458112, + "learning_rate": 3.792843691148776e-05, + "loss": 0.0014, + "step": 346 + }, + { + "clip_ratio": 0.013239889405667782, + "epoch": 0.2940677966101695, + "grad_norm": 0.07460521909248337, + "learning_rate": 3.7909604519774014e-05, + "loss": -0.0005, + "step": 347 + }, + { + "clip_ratio": 0.033433884382247925, + "epoch": 0.29491525423728815, + "grad_norm": 0.06058703312441606, + "learning_rate": 3.7890772128060265e-05, + "loss": -0.002, + "step": 348 + }, + { + "clip_ratio": 0.003388527315109968, + "completion_length": 246.87501525878906, + "epoch": 0.2957627118644068, + "grad_norm": 0.0, + "learning_rate": 3.787193973634652e-05, + "loss": 0.0, + "num_tokens": 1909737.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 349 + }, + { + "clip_ratio": 0.003924295771867037, + "epoch": 0.2966101694915254, + "grad_norm": 0.0, + "learning_rate": 3.7853107344632774e-05, + "loss": 0.0, + "step": 350 + }, + { + "clip_ratio": 0.004607200622558594, + "epoch": 0.29745762711864404, + "grad_norm": 0.0, + "learning_rate": 3.7834274952919025e-05, + "loss": 0.0, + "step": 351 + }, + { + "clip_ratio": 0.007875598035752773, + "epoch": 0.2983050847457627, + "grad_norm": 0.0, + "learning_rate": 3.7815442561205276e-05, + "loss": 0.0, + "step": 352 + }, + { + "clip_ratio": 0.0009100620518438518, + "completion_length": 221.5178680419922, + "epoch": 0.29915254237288136, + "grad_norm": 0.0, + "learning_rate": 3.779661016949153e-05, + "loss": 0.0, + "num_tokens": 1930142.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 353 + }, + { + "clip_ratio": 0.002130310283973813, + "epoch": 0.3, + "grad_norm": 0.0, + "learning_rate": 3.777777777777778e-05, + "loss": 0.0, + "step": 354 + }, + { + "clip_ratio": 0.005219562910497189, + "epoch": 0.3008474576271186, + "grad_norm": 0.0, + "learning_rate": 3.7758945386064036e-05, + "loss": 0.0, + "step": 355 + }, + { + "clip_ratio": 0.007768368814140558, + "epoch": 0.3016949152542373, + "grad_norm": 0.0, + "learning_rate": 3.774011299435029e-05, + "loss": 0.0, + "step": 356 + }, + { + "clip_ratio": 0.002148033818230033, + "completion_length": 170.1607208251953, + "epoch": 0.30254237288135594, + "grad_norm": 0.15185128372630483, + "learning_rate": 3.772128060263654e-05, + "loss": -0.0055, + "num_tokens": 1947871.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 357 + }, + { + "clip_ratio": 0.0094491271302104, + "epoch": 0.30338983050847457, + "grad_norm": 0.08593602543887907, + "learning_rate": 3.770244821092279e-05, + "loss": -0.0077, + "step": 358 + }, + { + "clip_ratio": 0.06222861260175705, + "epoch": 0.3042372881355932, + "grad_norm": 0.06218840086209775, + "learning_rate": 3.768361581920904e-05, + "loss": -0.0087, + "step": 359 + }, + { + "clip_ratio": 0.10356654226779938, + "epoch": 0.3050847457627119, + "grad_norm": 0.07005653665235588, + "learning_rate": 3.766478342749529e-05, + "loss": -0.0091, + "step": 360 + }, + { + "clip_ratio": 0.0011975433444604278, + "completion_length": 213.58929443359375, + "epoch": 0.3059322033898305, + "grad_norm": 0.0, + "learning_rate": 3.764595103578155e-05, + "loss": 0.0, + "num_tokens": 1967888.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 361 + }, + { + "clip_ratio": 0.0017720028990879655, + "epoch": 0.30677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.76271186440678e-05, + "loss": 0.0, + "step": 362 + }, + { + "clip_ratio": 0.002977039897814393, + "epoch": 0.3076271186440678, + "grad_norm": 0.0, + "learning_rate": 3.760828625235405e-05, + "loss": 0.0, + "step": 363 + }, + { + "clip_ratio": 0.0051023694686591625, + "epoch": 0.30847457627118646, + "grad_norm": 0.0, + "learning_rate": 3.758945386064031e-05, + "loss": 0.0, + "step": 364 + }, + { + "clip_ratio": 0.00277900043874979, + "completion_length": 175.7857208251953, + "epoch": 0.3093220338983051, + "grad_norm": 0.0, + "learning_rate": 3.7570621468926554e-05, + "loss": 0.0, + "num_tokens": 1985452.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 365 + }, + { + "clip_ratio": 0.0037012884858995676, + "epoch": 0.3101694915254237, + "grad_norm": 0.0, + "learning_rate": 3.755178907721281e-05, + "loss": 0.0, + "step": 366 + }, + { + "clip_ratio": 0.003547186963260174, + "epoch": 0.31101694915254235, + "grad_norm": 0.0, + "learning_rate": 3.753295668549906e-05, + "loss": 0.0, + "step": 367 + }, + { + "clip_ratio": 0.005322239827364683, + "epoch": 0.31186440677966104, + "grad_norm": 0.0, + "learning_rate": 3.7514124293785313e-05, + "loss": 0.0, + "step": 368 + }, + { + "clip_ratio": 0.004034657031297684, + "completion_length": 110.10714721679688, + "epoch": 0.31271186440677967, + "grad_norm": 0.12635496286827033, + "learning_rate": 3.7495291902071565e-05, + "loss": -0.0029, + "num_tokens": 1998890.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 369 + }, + { + "clip_ratio": 0.009292816743254662, + "epoch": 0.3135593220338983, + "grad_norm": 0.09312227257169194, + "learning_rate": 3.7476459510357816e-05, + "loss": -0.0051, + "step": 370 + }, + { + "clip_ratio": 0.03155684098601341, + "epoch": 0.31440677966101693, + "grad_norm": 0.05768111194452057, + "learning_rate": 3.745762711864407e-05, + "loss": -0.0068, + "step": 371 + }, + { + "clip_ratio": 0.0636262521147728, + "epoch": 0.3152542372881356, + "grad_norm": 0.047107030238279814, + "learning_rate": 3.7438794726930325e-05, + "loss": -0.0077, + "step": 372 + }, + { + "clip_ratio": 0.001232151291333139, + "completion_length": 234.87501525878906, + "epoch": 0.31610169491525425, + "grad_norm": 0.0, + "learning_rate": 3.7419962335216576e-05, + "loss": 0.0, + "num_tokens": 2018547.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 373 + }, + { + "clip_ratio": 0.000776052416767925, + "epoch": 0.3169491525423729, + "grad_norm": 0.0, + "learning_rate": 3.740112994350283e-05, + "loss": 0.0, + "step": 374 + }, + { + "clip_ratio": 0.0008471307810395956, + "epoch": 0.3177966101694915, + "grad_norm": 0.0, + "learning_rate": 3.7382297551789085e-05, + "loss": 0.0, + "step": 375 + }, + { + "clip_ratio": 0.0025251915212720633, + "epoch": 0.31864406779661014, + "grad_norm": 0.0, + "learning_rate": 3.736346516007533e-05, + "loss": 0.0, + "step": 376 + }, + { + "clip_ratio": 0.0007399375317618251, + "completion_length": 256.4821472167969, + "epoch": 0.31949152542372883, + "grad_norm": 0.0, + "learning_rate": 3.734463276836159e-05, + "loss": 0.0, + "num_tokens": 2041454.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 377 + }, + { + "clip_ratio": 0.0008243753691203892, + "epoch": 0.32033898305084746, + "grad_norm": 0.0, + "learning_rate": 3.732580037664784e-05, + "loss": 0.0, + "step": 378 + }, + { + "clip_ratio": 0.0007023674552328885, + "epoch": 0.3211864406779661, + "grad_norm": 0.0, + "learning_rate": 3.730696798493409e-05, + "loss": 0.0, + "step": 379 + }, + { + "clip_ratio": 0.0021256571635603905, + "epoch": 0.3220338983050847, + "grad_norm": 0.0, + "learning_rate": 3.728813559322034e-05, + "loss": 0.0, + "step": 380 + }, + { + "clip_ratio": 0.00031836971174925566, + "completion_length": 330.5357360839844, + "epoch": 0.3228813559322034, + "grad_norm": 0.0, + "learning_rate": 3.72693032015066e-05, + "loss": 0.0, + "num_tokens": 2067788.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 381 + }, + { + "clip_ratio": 0.0005261868936941028, + "epoch": 0.32372881355932204, + "grad_norm": 0.0, + "learning_rate": 3.725047080979284e-05, + "loss": 0.0, + "step": 382 + }, + { + "clip_ratio": 0.00022253258794080466, + "epoch": 0.32457627118644067, + "grad_norm": 0.0, + "learning_rate": 3.72316384180791e-05, + "loss": 0.0, + "step": 383 + }, + { + "clip_ratio": 0.0006931009120307863, + "epoch": 0.3254237288135593, + "grad_norm": 0.0, + "learning_rate": 3.721280602636535e-05, + "loss": 0.0, + "step": 384 + }, + { + "clip_ratio": 0.003750877920538187, + "completion_length": 131.32144165039062, + "epoch": 0.326271186440678, + "grad_norm": 0.13631999098251835, + "learning_rate": 3.71939736346516e-05, + "loss": -0.0398, + "num_tokens": 2082318.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 385 + }, + { + "clip_ratio": 0.008503442630171776, + "epoch": 0.3271186440677966, + "grad_norm": 0.10569445062232001, + "learning_rate": 3.717514124293786e-05, + "loss": -0.0422, + "step": 386 + }, + { + "clip_ratio": 0.023656172677874565, + "epoch": 0.32796610169491525, + "grad_norm": 0.08440756273471868, + "learning_rate": 3.715630885122411e-05, + "loss": -0.0449, + "step": 387 + }, + { + "clip_ratio": 0.05569107085466385, + "epoch": 0.3288135593220339, + "grad_norm": 0.0732865060873415, + "learning_rate": 3.713747645951036e-05, + "loss": -0.0475, + "step": 388 + }, + { + "clip_ratio": 0.0007906121318228543, + "completion_length": 286.9821472167969, + "epoch": 0.32966101694915256, + "grad_norm": 0.12082254351176634, + "learning_rate": 3.711864406779661e-05, + "loss": 0.0063, + "num_tokens": 2106125.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 389 + }, + { + "clip_ratio": 0.0015471117803826928, + "epoch": 0.3305084745762712, + "grad_norm": 0.11784337456998906, + "learning_rate": 3.7099811676082864e-05, + "loss": 0.0037, + "step": 390 + }, + { + "clip_ratio": 0.012488815933465958, + "epoch": 0.3313559322033898, + "grad_norm": 0.0941445528587045, + "learning_rate": 3.7080979284369115e-05, + "loss": 0.0003, + "step": 391 + }, + { + "clip_ratio": 0.028695791959762573, + "epoch": 0.33220338983050846, + "grad_norm": 0.08292380918920757, + "learning_rate": 3.706214689265537e-05, + "loss": -0.0027, + "step": 392 + }, + { + "clip_ratio": 0.0003602007054723799, + "completion_length": 239.83929443359375, + "epoch": 0.33305084745762714, + "grad_norm": 0.03143069205221857, + "learning_rate": 3.704331450094162e-05, + "loss": -0.0148, + "num_tokens": 2126076.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 393 + }, + { + "clip_ratio": 0.0013097112532705069, + "epoch": 0.3338983050847458, + "grad_norm": 0.03129928506196762, + "learning_rate": 3.7024482109227875e-05, + "loss": -0.0151, + "step": 394 + }, + { + "clip_ratio": 0.0041871643625199795, + "epoch": 0.3347457627118644, + "grad_norm": 0.030726291213562903, + "learning_rate": 3.7005649717514126e-05, + "loss": -0.0154, + "step": 395 + }, + { + "clip_ratio": 0.009951738640666008, + "epoch": 0.33559322033898303, + "grad_norm": 0.028938580269396656, + "learning_rate": 3.698681732580038e-05, + "loss": -0.0158, + "step": 396 + }, + { + "clip_ratio": 0.0010198758682236075, + "completion_length": 190.6607208251953, + "epoch": 0.3364406779661017, + "grad_norm": 0.0, + "learning_rate": 3.6967984934086635e-05, + "loss": 0.0, + "num_tokens": 2143449.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 397 + }, + { + "clip_ratio": 0.0006895315600559115, + "epoch": 0.33728813559322035, + "grad_norm": 0.0, + "learning_rate": 3.6949152542372886e-05, + "loss": 0.0, + "step": 398 + }, + { + "clip_ratio": 0.0038485280238091946, + "epoch": 0.338135593220339, + "grad_norm": 0.0, + "learning_rate": 3.693032015065914e-05, + "loss": 0.0, + "step": 399 + }, + { + "clip_ratio": 0.00733610987663269, + "epoch": 0.3389830508474576, + "grad_norm": 0.0, + "learning_rate": 3.691148775894539e-05, + "loss": 0.0, + "step": 400 + }, + { + "clip_ratio": 0.00022421723406296223, + "completion_length": 276.83929443359375, + "epoch": 0.3398305084745763, + "grad_norm": 0.0, + "learning_rate": 3.689265536723164e-05, + "loss": 0.0, + "num_tokens": 2166472.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 401 + }, + { + "clip_ratio": 0.00020110365585424006, + "epoch": 0.34067796610169493, + "grad_norm": 0.0, + "learning_rate": 3.687382297551789e-05, + "loss": 0.0, + "step": 402 + }, + { + "clip_ratio": 0.00024886298342607915, + "epoch": 0.34152542372881356, + "grad_norm": 0.0, + "learning_rate": 3.685499058380415e-05, + "loss": 0.0, + "step": 403 + }, + { + "clip_ratio": 0.0005991093348711729, + "epoch": 0.3423728813559322, + "grad_norm": 0.0, + "learning_rate": 3.68361581920904e-05, + "loss": 0.0, + "step": 404 + }, + { + "clip_ratio": 0.0006365530425682664, + "completion_length": 191.17857360839844, + "epoch": 0.3432203389830508, + "grad_norm": 0.0, + "learning_rate": 3.681732580037665e-05, + "loss": 0.0, + "num_tokens": 2184146.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 405 + }, + { + "clip_ratio": 0.00040138812619261444, + "epoch": 0.3440677966101695, + "grad_norm": 0.0, + "learning_rate": 3.679849340866291e-05, + "loss": 0.0, + "step": 406 + }, + { + "clip_ratio": 0.0013467035023495555, + "epoch": 0.34491525423728814, + "grad_norm": 0.0, + "learning_rate": 3.677966101694915e-05, + "loss": 0.0, + "step": 407 + }, + { + "clip_ratio": 0.0009324097190983593, + "epoch": 0.34576271186440677, + "grad_norm": 0.0, + "learning_rate": 3.676082862523541e-05, + "loss": 0.0, + "step": 408 + }, + { + "clip_ratio": 0.0026480748783797026, + "completion_length": 210.1428680419922, + "epoch": 0.3466101694915254, + "grad_norm": 0.0, + "learning_rate": 3.674199623352166e-05, + "loss": 0.0, + "num_tokens": 2203578.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 409 + }, + { + "clip_ratio": 0.001794831594452262, + "epoch": 0.3474576271186441, + "grad_norm": 0.0, + "learning_rate": 3.672316384180791e-05, + "loss": 0.0, + "step": 410 + }, + { + "clip_ratio": 0.0019218155648559332, + "epoch": 0.3483050847457627, + "grad_norm": 0.0, + "learning_rate": 3.6704331450094164e-05, + "loss": 0.0, + "step": 411 + }, + { + "clip_ratio": 0.002752742264419794, + "epoch": 0.34915254237288135, + "grad_norm": 0.0, + "learning_rate": 3.668549905838042e-05, + "loss": 0.0, + "step": 412 + }, + { + "clip_ratio": 0.001983263995498419, + "completion_length": 254.7857208251953, + "epoch": 0.35, + "grad_norm": 0.0, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.0, + "num_tokens": 2225574.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 413 + }, + { + "clip_ratio": 0.0027607560623437166, + "epoch": 0.35084745762711866, + "grad_norm": 0.0, + "learning_rate": 3.6647834274952924e-05, + "loss": 0.0, + "step": 414 + }, + { + "clip_ratio": 0.002160451840609312, + "epoch": 0.3516949152542373, + "grad_norm": 0.0, + "learning_rate": 3.6629001883239175e-05, + "loss": 0.0, + "step": 415 + }, + { + "clip_ratio": 0.002045322209596634, + "epoch": 0.3525423728813559, + "grad_norm": 0.0, + "learning_rate": 3.6610169491525426e-05, + "loss": 0.0, + "step": 416 + }, + { + "clip_ratio": 0.001061619957908988, + "completion_length": 241.58929443359375, + "epoch": 0.35338983050847456, + "grad_norm": 0.0, + "learning_rate": 3.6591337099811684e-05, + "loss": 0.0, + "num_tokens": 2245623.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 417 + }, + { + "clip_ratio": 0.0012349070748314261, + "epoch": 0.35423728813559324, + "grad_norm": 0.0, + "learning_rate": 3.657250470809793e-05, + "loss": 0.0, + "step": 418 + }, + { + "clip_ratio": 0.0013292384101077914, + "epoch": 0.3550847457627119, + "grad_norm": 0.0, + "learning_rate": 3.6553672316384186e-05, + "loss": 0.0, + "step": 419 + }, + { + "clip_ratio": 0.001080949092283845, + "epoch": 0.3559322033898305, + "grad_norm": 0.0, + "learning_rate": 3.653483992467044e-05, + "loss": 0.0, + "step": 420 + }, + { + "clip_ratio": 0.0021271593868732452, + "completion_length": 207.94644165039062, + "epoch": 0.35677966101694913, + "grad_norm": 0.0, + "learning_rate": 3.651600753295669e-05, + "loss": 0.0, + "num_tokens": 2264492.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 421 + }, + { + "clip_ratio": 0.0023707763757556677, + "epoch": 0.3576271186440678, + "grad_norm": 0.0, + "learning_rate": 3.649717514124294e-05, + "loss": 0.0, + "step": 422 + }, + { + "clip_ratio": 0.001882536569610238, + "epoch": 0.35847457627118645, + "grad_norm": 0.0, + "learning_rate": 3.64783427495292e-05, + "loss": 0.0, + "step": 423 + }, + { + "clip_ratio": 0.002321977633982897, + "epoch": 0.3593220338983051, + "grad_norm": 0.0, + "learning_rate": 3.645951035781544e-05, + "loss": 0.0, + "step": 424 + }, + { + "clip_ratio": 0.0014300509355962276, + "completion_length": 204.3928680419922, + "epoch": 0.3601694915254237, + "grad_norm": 0.0, + "learning_rate": 3.64406779661017e-05, + "loss": 0.0, + "num_tokens": 2283362.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 425 + }, + { + "clip_ratio": 0.001956741791218519, + "epoch": 0.3610169491525424, + "grad_norm": 0.0, + "learning_rate": 3.642184557438795e-05, + "loss": 0.0, + "step": 426 + }, + { + "clip_ratio": 0.0016578995855525136, + "epoch": 0.36186440677966103, + "grad_norm": 0.0, + "learning_rate": 3.64030131826742e-05, + "loss": 0.0, + "step": 427 + }, + { + "clip_ratio": 0.0019775168038904667, + "epoch": 0.36271186440677966, + "grad_norm": 0.0, + "learning_rate": 3.638418079096045e-05, + "loss": 0.0, + "step": 428 + }, + { + "clip_ratio": 0.001298850984312594, + "completion_length": 171.12501525878906, + "epoch": 0.3635593220338983, + "grad_norm": 0.0, + "learning_rate": 3.636534839924671e-05, + "loss": 0.0, + "num_tokens": 2300497.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 429 + }, + { + "clip_ratio": 0.001368111465126276, + "epoch": 0.3644067796610169, + "grad_norm": 0.0, + "learning_rate": 3.634651600753296e-05, + "loss": 0.0, + "step": 430 + }, + { + "clip_ratio": 0.0009136229637078941, + "epoch": 0.3652542372881356, + "grad_norm": 0.0, + "learning_rate": 3.632768361581921e-05, + "loss": 0.0, + "step": 431 + }, + { + "clip_ratio": 0.0010722121223807335, + "epoch": 0.36610169491525424, + "grad_norm": 0.0, + "learning_rate": 3.630885122410546e-05, + "loss": 0.0, + "step": 432 + }, + { + "clip_ratio": 0.0001718213752610609, + "completion_length": 236.62501525878906, + "epoch": 0.36694915254237287, + "grad_norm": 0.0, + "learning_rate": 3.6290018832391714e-05, + "loss": 0.0, + "num_tokens": 2322044.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 433 + }, + { + "clip_ratio": 0.0003989361284766346, + "epoch": 0.3677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.627118644067797e-05, + "loss": 0.0, + "step": 434 + }, + { + "clip_ratio": 0.00034700697869993746, + "epoch": 0.3686440677966102, + "grad_norm": 0.0, + "learning_rate": 3.625235404896422e-05, + "loss": 0.0, + "step": 435 + }, + { + "clip_ratio": 0.0001743702741805464, + "epoch": 0.3694915254237288, + "grad_norm": 0.0, + "learning_rate": 3.6233521657250474e-05, + "loss": 0.0, + "step": 436 + }, + { + "clip_ratio": 0.002670450834557414, + "completion_length": 238.0357208251953, + "epoch": 0.37033898305084745, + "grad_norm": 0.0, + "learning_rate": 3.6214689265536725e-05, + "loss": 0.0, + "num_tokens": 2342662.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 437 + }, + { + "clip_ratio": 0.002840070752426982, + "epoch": 0.3711864406779661, + "grad_norm": 0.0, + "learning_rate": 3.6195856873822976e-05, + "loss": 0.0, + "step": 438 + }, + { + "clip_ratio": 0.0031569055281579494, + "epoch": 0.37203389830508476, + "grad_norm": 0.0, + "learning_rate": 3.617702448210923e-05, + "loss": 0.0, + "step": 439 + }, + { + "clip_ratio": 0.002347626956179738, + "epoch": 0.3728813559322034, + "grad_norm": 0.0, + "learning_rate": 3.6158192090395485e-05, + "loss": 0.0, + "step": 440 + }, + { + "clip_ratio": 0.001670468831434846, + "completion_length": 260.2857360839844, + "epoch": 0.373728813559322, + "grad_norm": 0.0, + "learning_rate": 3.6139359698681736e-05, + "loss": 0.0, + "num_tokens": 2364478.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 441 + }, + { + "clip_ratio": 0.0009430252248421311, + "epoch": 0.37457627118644066, + "grad_norm": 0.0, + "learning_rate": 3.612052730696799e-05, + "loss": 0.0, + "step": 442 + }, + { + "clip_ratio": 0.001254075556062162, + "epoch": 0.37542372881355934, + "grad_norm": 0.0, + "learning_rate": 3.610169491525424e-05, + "loss": 0.0, + "step": 443 + }, + { + "clip_ratio": 0.0011122890282422304, + "epoch": 0.376271186440678, + "grad_norm": 0.0, + "learning_rate": 3.608286252354049e-05, + "loss": 0.0, + "step": 444 + }, + { + "clip_ratio": 0.0003623559314291924, + "completion_length": 211.82144165039062, + "epoch": 0.3771186440677966, + "grad_norm": 0.0, + "learning_rate": 3.606403013182675e-05, + "loss": 0.0, + "num_tokens": 2383820.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 445 + }, + { + "clip_ratio": 0.00031094998121261597, + "epoch": 0.37796610169491524, + "grad_norm": 0.0, + "learning_rate": 3.6045197740113e-05, + "loss": 0.0, + "step": 446 + }, + { + "clip_ratio": 0.0007522654486820102, + "epoch": 0.3788135593220339, + "grad_norm": 0.0, + "learning_rate": 3.602636534839925e-05, + "loss": 0.0, + "step": 447 + }, + { + "clip_ratio": 0.00031094998121261597, + "epoch": 0.37966101694915255, + "grad_norm": 0.0, + "learning_rate": 3.60075329566855e-05, + "loss": 0.0, + "step": 448 + }, + { + "clip_ratio": 0.00025147091946564615, + "completion_length": 198.21429443359375, + "epoch": 0.3805084745762712, + "grad_norm": 0.0, + "learning_rate": 3.598870056497175e-05, + "loss": 0.0, + "num_tokens": 2402592.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 449 + }, + { + "clip_ratio": 0.0006872185622341931, + "epoch": 0.3813559322033898, + "grad_norm": 0.0, + "learning_rate": 3.5969868173258e-05, + "loss": 0.0, + "step": 450 + }, + { + "clip_ratio": 0.0006297206855379045, + "epoch": 0.3822033898305085, + "grad_norm": 0.0, + "learning_rate": 3.595103578154426e-05, + "loss": 0.0, + "step": 451 + }, + { + "clip_ratio": 0.00026596483075991273, + "epoch": 0.38305084745762713, + "grad_norm": 0.0, + "learning_rate": 3.593220338983051e-05, + "loss": 0.0, + "step": 452 + }, + { + "clip_ratio": 0.001058344729244709, + "completion_length": 238.69644165039062, + "epoch": 0.38389830508474576, + "grad_norm": 0.0, + "learning_rate": 3.591337099811676e-05, + "loss": 0.0, + "num_tokens": 2423007.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 453 + }, + { + "clip_ratio": 0.0014913092600181699, + "epoch": 0.3847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.589453860640302e-05, + "loss": 0.0, + "step": 454 + }, + { + "clip_ratio": 0.001653712592087686, + "epoch": 0.3855932203389831, + "grad_norm": 0.0, + "learning_rate": 3.5875706214689265e-05, + "loss": 0.0, + "step": 455 + }, + { + "clip_ratio": 0.0012562735937535763, + "epoch": 0.3864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.585687382297552e-05, + "loss": 0.0, + "step": 456 + }, + { + "clip_ratio": 0.0019226728472858667, + "completion_length": 215.57144165039062, + "epoch": 0.38728813559322034, + "grad_norm": 0.0, + "learning_rate": 3.5838041431261774e-05, + "loss": 0.0, + "num_tokens": 2442303.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 457 + }, + { + "clip_ratio": 0.0007327733910642564, + "epoch": 0.38813559322033897, + "grad_norm": 0.0, + "learning_rate": 3.5819209039548025e-05, + "loss": 0.0, + "step": 458 + }, + { + "clip_ratio": 0.0009872624650597572, + "epoch": 0.3889830508474576, + "grad_norm": 0.0, + "learning_rate": 3.5800376647834276e-05, + "loss": 0.0, + "step": 459 + }, + { + "clip_ratio": 0.0015375473303720355, + "epoch": 0.3898305084745763, + "grad_norm": 0.0, + "learning_rate": 3.5781544256120534e-05, + "loss": 0.0, + "step": 460 + }, + { + "clip_ratio": 0.0025008555967360735, + "completion_length": 174.6607208251953, + "epoch": 0.3906779661016949, + "grad_norm": 0.0, + "learning_rate": 3.576271186440678e-05, + "loss": 0.0, + "num_tokens": 2458980.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 461 + }, + { + "clip_ratio": 0.002855929546058178, + "epoch": 0.39152542372881355, + "grad_norm": 0.0, + "learning_rate": 3.5743879472693036e-05, + "loss": 0.0, + "step": 462 + }, + { + "clip_ratio": 0.0031251059845089912, + "epoch": 0.3923728813559322, + "grad_norm": 0.0, + "learning_rate": 3.572504708097929e-05, + "loss": 0.0, + "step": 463 + }, + { + "clip_ratio": 0.002123030601069331, + "epoch": 0.39322033898305087, + "grad_norm": 0.0, + "learning_rate": 3.570621468926554e-05, + "loss": 0.0, + "step": 464 + }, + { + "clip_ratio": 0.00038411590503528714, + "completion_length": 251.46429443359375, + "epoch": 0.3940677966101695, + "grad_norm": 0.0, + "learning_rate": 3.5687382297551796e-05, + "loss": 0.0, + "num_tokens": 2480318.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 465 + }, + { + "clip_ratio": 0.0006886526243761182, + "epoch": 0.3949152542372881, + "grad_norm": 0.0, + "learning_rate": 3.566854990583805e-05, + "loss": 0.0, + "step": 466 + }, + { + "clip_ratio": 0.0007815518183633685, + "epoch": 0.39576271186440676, + "grad_norm": 0.0, + "learning_rate": 3.56497175141243e-05, + "loss": 0.0, + "step": 467 + }, + { + "clip_ratio": 0.0003181493084412068, + "epoch": 0.39661016949152544, + "grad_norm": 0.0, + "learning_rate": 3.563088512241055e-05, + "loss": 0.0, + "step": 468 + }, + { + "clip_ratio": 0.0013011619448661804, + "completion_length": 229.0178680419922, + "epoch": 0.3974576271186441, + "grad_norm": 0.0, + "learning_rate": 3.56120527306968e-05, + "loss": 0.0, + "num_tokens": 2500487.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 469 + }, + { + "clip_ratio": 0.0009047752828337252, + "epoch": 0.3983050847457627, + "grad_norm": 0.0, + "learning_rate": 3.559322033898305e-05, + "loss": 0.0, + "step": 470 + }, + { + "clip_ratio": 0.0014872046886011958, + "epoch": 0.39915254237288134, + "grad_norm": 0.0, + "learning_rate": 3.557438794726931e-05, + "loss": 0.0, + "step": 471 + }, + { + "clip_ratio": 0.0017439923249185085, + "epoch": 0.4, + "grad_norm": 0.0, + "learning_rate": 3.555555555555555e-05, + "loss": 0.0, + "step": 472 + }, + { + "clip_ratio": 0.0008359851781278849, + "completion_length": 224.00001525878906, + "epoch": 0.40084745762711865, + "grad_norm": 0.0, + "learning_rate": 3.553672316384181e-05, + "loss": 0.0, + "num_tokens": 2520007.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 473 + }, + { + "clip_ratio": 0.0003327638260088861, + "epoch": 0.4016949152542373, + "grad_norm": 0.0, + "learning_rate": 3.551789077212806e-05, + "loss": 0.0, + "step": 474 + }, + { + "clip_ratio": 0.0008026210125535727, + "epoch": 0.4025423728813559, + "grad_norm": 0.0, + "learning_rate": 3.549905838041431e-05, + "loss": 0.0, + "step": 475 + }, + { + "clip_ratio": 0.000616455334238708, + "epoch": 0.4033898305084746, + "grad_norm": 0.0, + "learning_rate": 3.548022598870057e-05, + "loss": 0.0, + "step": 476 + }, + { + "clip_ratio": 0.0010224612196907401, + "completion_length": 212.6607208251953, + "epoch": 0.40423728813559323, + "grad_norm": 0.0, + "learning_rate": 3.546139359698682e-05, + "loss": 0.0, + "num_tokens": 2539756.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 477 + }, + { + "clip_ratio": 0.001049195067025721, + "epoch": 0.40508474576271186, + "grad_norm": 0.0, + "learning_rate": 3.544256120527307e-05, + "loss": 0.0, + "step": 478 + }, + { + "clip_ratio": 0.0010224612196907401, + "epoch": 0.4059322033898305, + "grad_norm": 0.0, + "learning_rate": 3.5423728813559324e-05, + "loss": 0.0, + "step": 479 + }, + { + "clip_ratio": 0.0011091463966295123, + "epoch": 0.4067796610169492, + "grad_norm": 0.0, + "learning_rate": 3.5404896421845575e-05, + "loss": 0.0, + "step": 480 + }, + { + "clip_ratio": 0.0008076262311078608, + "completion_length": 188.33929443359375, + "epoch": 0.4076271186440678, + "grad_norm": 0.0, + "learning_rate": 3.5386064030131826e-05, + "loss": 0.0, + "num_tokens": 2557479.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 481 + }, + { + "clip_ratio": 0.0010148414876312017, + "epoch": 0.40847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.5367231638418084e-05, + "loss": 0.0, + "step": 482 + }, + { + "clip_ratio": 0.00107091898098588, + "epoch": 0.40932203389830507, + "grad_norm": 0.0, + "learning_rate": 3.5348399246704335e-05, + "loss": 0.0, + "step": 483 + }, + { + "clip_ratio": 0.001206585788168013, + "epoch": 0.4101694915254237, + "grad_norm": 0.0, + "learning_rate": 3.5329566854990586e-05, + "loss": 0.0, + "step": 484 + }, + { + "clip_ratio": 0.0006591131095774472, + "completion_length": 202.25001525878906, + "epoch": 0.4110169491525424, + "grad_norm": 0.0, + "learning_rate": 3.5310734463276844e-05, + "loss": 0.0, + "num_tokens": 2575709.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 485 + }, + { + "clip_ratio": 0.0011115194065496325, + "epoch": 0.411864406779661, + "grad_norm": 0.0, + "learning_rate": 3.529190207156309e-05, + "loss": 0.0, + "step": 486 + }, + { + "clip_ratio": 0.0010618160013109446, + "epoch": 0.41271186440677965, + "grad_norm": 0.0, + "learning_rate": 3.5273069679849346e-05, + "loss": 0.0, + "step": 487 + }, + { + "clip_ratio": 0.001367724034935236, + "epoch": 0.4135593220338983, + "grad_norm": 0.0, + "learning_rate": 3.52542372881356e-05, + "loss": 0.0, + "step": 488 + }, + { + "clip_ratio": 0.00175467727240175, + "completion_length": 132.1428680419922, + "epoch": 0.41440677966101697, + "grad_norm": 0.0, + "learning_rate": 3.523540489642185e-05, + "loss": 0.0, + "num_tokens": 2590205.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 489 + }, + { + "clip_ratio": 0.0024672539439052343, + "epoch": 0.4152542372881356, + "grad_norm": 0.0, + "learning_rate": 3.52165725047081e-05, + "loss": 0.0, + "step": 490 + }, + { + "clip_ratio": 0.001686076750047505, + "epoch": 0.4161016949152542, + "grad_norm": 0.0, + "learning_rate": 3.519774011299436e-05, + "loss": 0.0, + "step": 491 + }, + { + "clip_ratio": 0.001242591068148613, + "epoch": 0.41694915254237286, + "grad_norm": 0.0, + "learning_rate": 3.51789077212806e-05, + "loss": 0.0, + "step": 492 + }, + { + "clip_ratio": 0.00047669216291978955, + "completion_length": 206.7857208251953, + "epoch": 0.41779661016949154, + "grad_norm": 0.0, + "learning_rate": 3.516007532956686e-05, + "loss": 0.0, + "num_tokens": 2608545.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 493 + }, + { + "clip_ratio": 0.0006388615584000945, + "epoch": 0.4186440677966102, + "grad_norm": 0.0, + "learning_rate": 3.514124293785311e-05, + "loss": 0.0, + "step": 494 + }, + { + "clip_ratio": 0.0007421272457577288, + "epoch": 0.4194915254237288, + "grad_norm": 0.0, + "learning_rate": 3.512241054613936e-05, + "loss": 0.0, + "step": 495 + }, + { + "clip_ratio": 0.0011416444322094321, + "epoch": 0.42033898305084744, + "grad_norm": 0.0, + "learning_rate": 3.510357815442562e-05, + "loss": 0.0, + "step": 496 + }, + { + "clip_ratio": 0.0008945852750912309, + "completion_length": 218.6428680419922, + "epoch": 0.4211864406779661, + "grad_norm": 0.0, + "learning_rate": 3.5084745762711864e-05, + "loss": 0.0, + "num_tokens": 2628469.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 497 + }, + { + "clip_ratio": 0.0007813043775968254, + "epoch": 0.42203389830508475, + "grad_norm": 0.0, + "learning_rate": 3.506591337099812e-05, + "loss": 0.0, + "step": 498 + }, + { + "clip_ratio": 0.0005737429019063711, + "epoch": 0.4228813559322034, + "grad_norm": 0.0, + "learning_rate": 3.504708097928437e-05, + "loss": 0.0, + "step": 499 + }, + { + "clip_ratio": 0.0009029792272485793, + "epoch": 0.423728813559322, + "grad_norm": 0.0, + "learning_rate": 3.5028248587570624e-05, + "loss": 0.0, + "step": 500 + } + ], + "logging_steps": 1, + "max_steps": 2360, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/.ipynb_checkpoints/zero_to_fp32-checkpoint.py b/checkpoint-500/.ipynb_checkpoints/zero_to_fp32-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/checkpoint-500/.ipynb_checkpoints/zero_to_fp32-checkpoint.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a0f59b7b91d61179514d6e990a01e588f1af99b0 --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.0 \ No newline at end of file diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..29e0b3a4b795e316b1a7ba9b7dc790302a9d6e0f --- /dev/null +++ b/checkpoint-500/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "gate_proj", + "o_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bc0fe3f68ce547354fd632eeda4ef92c44a07dd --- /dev/null +++ b/checkpoint-500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb884e04dc139c6c2c97f62d9b58680c586a044bd007fed0d62896899f00a719 +size 167832688 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e0f5b3ba1a53f6b12e9fa911ade5917e80edbe6 --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b3215b6f56183efb3bb9d6df2ddfdbf03d4d97a16eee9d86910ee030beb552 +size 72284496 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..31c79f578692d5343dc2a8c4ee5924c9a097734f --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da6593f79bee7ea376fdfe1fddb92c578f7e28b46c1d2d25994b771f4d34c7b3 +size 72284496 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0662f23b7658d99dfd2c149188cb261dd9f1443e --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b10b6a1803b8fb1a569724dfdc796f6fb2ad631fb09bd2bb806a9888e149fa4 +size 72284496 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8d379daba2e111af91d2e62becb01d8a85b50df --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daab077403666e3a92ffb4c2bd7bb524582c8283b11bad99ea70cf8b1846669c +size 72284496 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..992eefdc5769834ef711719862c3dc0d39a4d09c --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0bd0d47573091ceb38c33a695e1d2af0438c7b03ae606b5f462c43cc605928d +size 72284496 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e37f83727567f0ccd789c76f492b48f419c0dc3e --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e858ead0e5ac4fd735a5583d3e23b3cdd1a34afec3f8b34f470d2b12f6f1859 +size 72284496 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5af905b1a4acec682b55346fbdfcbf2b9d4b3093 --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82006158cbce0e4ebfbf43ed022c3787291883181b36c657578e054721a7fe24 +size 72284496 diff --git a/checkpoint-500/global_step500/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eef365b35ef1b1754276bc9b1258b9a74e4543a8 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d7caa2cfe96a19abe685332467ac22b115ed252a21002a3b943261be20b3c3 +size 443182 diff --git a/checkpoint-500/global_step500/zero_pp_rank_1_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a0c18d24dc4728fbbe4d171cc75915709a4bed5 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b032521778e7a6469968c4a290ce965e63a1e18bab75fdf606708ffe514da24 +size 443182 diff --git a/checkpoint-500/global_step500/zero_pp_rank_2_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb654659d12df45da1211b372dc5b6a52f6bf910 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a44e80565fb17280521240d533dd1850268f8ab1d3c55d75436a71c4b95064ae +size 443182 diff --git a/checkpoint-500/global_step500/zero_pp_rank_3_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c04e4f91ebb6dc835c2db92ead20bf10e812737b --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59aed78c2748326a2290be6caa8caa6714b5ab6f69d51659c8bb1f419a20af99 +size 443182 diff --git a/checkpoint-500/global_step500/zero_pp_rank_4_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff5222508ad9b86f626f20316bfd325630db690b --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:931e1b3d418454ab8bbdaffa4389c77fb59b21b263cf1f474ce535ac9854f1ce +size 443182 diff --git a/checkpoint-500/global_step500/zero_pp_rank_5_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c2371d5603d4b23d5098cdf74190dcfdd5d9df0 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d45a5f3d803db28860cf8a51cf70de14dcc262509ddcbedb0194f679502f2fd2 +size 443182 diff --git a/checkpoint-500/global_step500/zero_pp_rank_6_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..59500115e4f730a66c05850cfc94b82a24418a09 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe56399d7fa521c93a482b949f8808230893f70ad4f32edd892d88620e327cf +size 443182 diff --git a/checkpoint-500/latest b/checkpoint-500/latest new file mode 100644 index 0000000000000000000000000000000000000000..f0b47ce15fff9a01b2a416a473b2148085048a50 --- /dev/null +++ b/checkpoint-500/latest @@ -0,0 +1 @@ +global_step500 \ No newline at end of file diff --git a/checkpoint-500/rng_state_0.pth b/checkpoint-500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..31f56865b075f09edf44f48584720221a7423d97 --- /dev/null +++ b/checkpoint-500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b6cf3f5eaa06755e1d9600807aa5b55661c173b01cf99c350bfe2e4608e802 +size 15728 diff --git a/checkpoint-500/rng_state_1.pth b/checkpoint-500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae5e44d4cc1b4ec1437bf5d9f09eee8edeab3fff --- /dev/null +++ b/checkpoint-500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb01ba7ec4a564b7d495054c0991f78ed817a0698505c7ea5b388deab23ea65 +size 15728 diff --git a/checkpoint-500/rng_state_2.pth b/checkpoint-500/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3cedb33a9894044f400ce3c638037f85021fb8d9 --- /dev/null +++ b/checkpoint-500/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53423892130294e5e95d150e57e53ad7fed918ca69d9f1abc9f0057232f7955d +size 15728 diff --git a/checkpoint-500/rng_state_3.pth b/checkpoint-500/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c4dc6834b781945424d88ae679398e1e2a3de6e --- /dev/null +++ b/checkpoint-500/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3277ffd505783253dcec3ca71696632663f1e98189d7d116d2337cd55d4aeaee +size 15792 diff --git a/checkpoint-500/rng_state_4.pth b/checkpoint-500/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..efd7f81986a1bdc267170c00c9825128f058d19c --- /dev/null +++ b/checkpoint-500/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e002a2d1d3543136286242900eedabe21c6474bf2b47bee2280c906737e44c +size 15728 diff --git a/checkpoint-500/rng_state_5.pth b/checkpoint-500/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..4f6ad000fd51afcba686dd3a6d1733652ed9e261 --- /dev/null +++ b/checkpoint-500/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65cfe05278d2e7d3f949bb28a5342832df4d1c3ea284e9bd75699a36fa0ecddd +size 15728 diff --git a/checkpoint-500/rng_state_6.pth b/checkpoint-500/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..a54668173ed1435c90e36672f7e479a115bb842c --- /dev/null +++ b/checkpoint-500/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2157897c20adc8d7931b09d638bdc52de1145ecfe49388edb0268364d27b3c71 +size 15728 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff99257a6b6fbda87c7856e6436e4662ef8399f1 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6033745cbf7df26345e0d7a47c38811e94fda2c2a704175b124d3848c4d4995e +size 1064 diff --git a/checkpoint-500/special_tokens_map.json b/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..92cc72bfcc2faff4ba96750b21c7d2e3cb92d25c --- /dev/null +++ b/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ff5472d095ccd9332d9e723153d7bc7226cb6be9c1bffda738b5ba2e71bf26 +size 17210084 diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ae1bc63bd6e5ca8a863628311061c143679ff93 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ccae6dc708bcf590de4430cf85d28c100ed90e88 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,5284 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.423728813559322, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio": 0.0, + "completion_length": 396.3571472167969, + "epoch": 0.000847457627118644, + "grad_norm": 0.028597827622128653, + "learning_rate": 1.6949152542372883e-07, + "loss": 0.0096, + "num_tokens": 29860.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 1 + }, + { + "clip_ratio": 0.0, + "epoch": 0.001694915254237288, + "grad_norm": 0.0283861264343528, + "learning_rate": 3.3898305084745766e-07, + "loss": 0.0096, + "step": 2 + }, + { + "clip_ratio": 0.0005210353410802782, + "epoch": 0.002542372881355932, + "grad_norm": 0.024416377206652233, + "learning_rate": 5.084745762711865e-07, + "loss": 0.0095, + "step": 3 + }, + { + "clip_ratio": 0.0003804714942816645, + "epoch": 0.003389830508474576, + "grad_norm": 0.024954590093213137, + "learning_rate": 6.779661016949153e-07, + "loss": 0.0096, + "step": 4 + }, + { + "clip_ratio": 0.00028131139697507024, + "completion_length": 477.6250305175781, + "epoch": 0.00423728813559322, + "grad_norm": 0.0, + "learning_rate": 8.474576271186441e-07, + "loss": 0.0, + "num_tokens": 64207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 5 + }, + { + "clip_ratio": 0.00026464727125130594, + "epoch": 0.005084745762711864, + "grad_norm": 0.0, + "learning_rate": 1.016949152542373e-06, + "loss": 0.0, + "step": 6 + }, + { + "clip_ratio": 0.0003427764168009162, + "epoch": 0.005932203389830509, + "grad_norm": 0.0, + "learning_rate": 1.186440677966102e-06, + "loss": 0.0, + "step": 7 + }, + { + "clip_ratio": 0.0003427252813708037, + "epoch": 0.006779661016949152, + "grad_norm": 0.0, + "learning_rate": 1.3559322033898307e-06, + "loss": 0.0, + "step": 8 + }, + { + "clip_ratio": 0.0003535364812705666, + "completion_length": 503.14288330078125, + "epoch": 0.007627118644067797, + "grad_norm": 0.0, + "learning_rate": 1.5254237288135596e-06, + "loss": 0.0, + "num_tokens": 99207.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 9 + }, + { + "clip_ratio": 0.00017467686848249286, + "epoch": 0.00847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.6949152542372882e-06, + "loss": 0.0, + "step": 10 + }, + { + "clip_ratio": 0.0002140275464626029, + "epoch": 0.009322033898305085, + "grad_norm": 0.0, + "learning_rate": 1.8644067796610171e-06, + "loss": 0.0, + "step": 11 + }, + { + "clip_ratio": 0.00035844597732648253, + "epoch": 0.010169491525423728, + "grad_norm": 0.0, + "learning_rate": 2.033898305084746e-06, + "loss": 0.0, + "step": 12 + }, + { + "clip_ratio": 0.00035540881799533963, + "completion_length": 471.83929443359375, + "epoch": 0.011016949152542373, + "grad_norm": 0.0, + "learning_rate": 2.203389830508475e-06, + "loss": 0.0, + "num_tokens": 132582.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 13 + }, + { + "clip_ratio": 0.0002507771132513881, + "epoch": 0.011864406779661017, + "grad_norm": 0.0, + "learning_rate": 2.372881355932204e-06, + "loss": 0.0, + "step": 14 + }, + { + "clip_ratio": 0.0001079499488696456, + "epoch": 0.012711864406779662, + "grad_norm": 0.0, + "learning_rate": 2.5423728813559323e-06, + "loss": 0.0, + "step": 15 + }, + { + "clip_ratio": 0.00021258163906168193, + "epoch": 0.013559322033898305, + "grad_norm": 0.0, + "learning_rate": 2.7118644067796613e-06, + "loss": 0.0, + "step": 16 + }, + { + "clip_ratio": 0.000322989042615518, + "completion_length": 387.14288330078125, + "epoch": 0.01440677966101695, + "grad_norm": 0.016452011518392446, + "learning_rate": 2.8813559322033903e-06, + "loss": 0.0658, + "num_tokens": 161406.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 17 + }, + { + "clip_ratio": 0.00034964055521413684, + "epoch": 0.015254237288135594, + "grad_norm": 0.017719451531367687, + "learning_rate": 3.0508474576271192e-06, + "loss": 0.0657, + "step": 18 + }, + { + "clip_ratio": 0.0004103984101675451, + "epoch": 0.016101694915254237, + "grad_norm": 0.016469439956852048, + "learning_rate": 3.2203389830508473e-06, + "loss": 0.0657, + "step": 19 + }, + { + "clip_ratio": 0.0003408819029573351, + "epoch": 0.01694915254237288, + "grad_norm": 0.017326107824003897, + "learning_rate": 3.3898305084745763e-06, + "loss": 0.0657, + "step": 20 + }, + { + "clip_ratio": 0.00046000577276572585, + "completion_length": 481.732177734375, + "epoch": 0.017796610169491526, + "grad_norm": 0.0, + "learning_rate": 3.5593220338983053e-06, + "loss": 0.0, + "num_tokens": 195711.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 21 + }, + { + "clip_ratio": 0.00042848457815125585, + "epoch": 0.01864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.7288135593220342e-06, + "loss": 0.0, + "step": 22 + }, + { + "clip_ratio": 0.0004297326668165624, + "epoch": 0.019491525423728815, + "grad_norm": 0.0, + "learning_rate": 3.898305084745763e-06, + "loss": 0.0, + "step": 23 + }, + { + "clip_ratio": 0.000281251355772838, + "epoch": 0.020338983050847456, + "grad_norm": 0.0, + "learning_rate": 4.067796610169492e-06, + "loss": 0.0, + "step": 24 + }, + { + "clip_ratio": 0.00017563004803378135, + "completion_length": 442.7500305175781, + "epoch": 0.0211864406779661, + "grad_norm": 0.11157048303951664, + "learning_rate": 4.23728813559322e-06, + "loss": 0.0104, + "num_tokens": 227185.0, + "reward": -0.8214285969734192, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 25 + }, + { + "clip_ratio": 0.00010569583537289873, + "epoch": 0.022033898305084745, + "grad_norm": 0.12213723346474271, + "learning_rate": 4.40677966101695e-06, + "loss": 0.0104, + "step": 26 + }, + { + "clip_ratio": 0.0005364188691601157, + "epoch": 0.02288135593220339, + "grad_norm": 0.11319483991164629, + "learning_rate": 4.576271186440678e-06, + "loss": 0.0106, + "step": 27 + }, + { + "clip_ratio": 0.0010358322178944945, + "epoch": 0.023728813559322035, + "grad_norm": 0.10119136649790463, + "learning_rate": 4.745762711864408e-06, + "loss": 0.0101, + "step": 28 + }, + { + "clip_ratio": 0.0002854761842172593, + "completion_length": 420.51788330078125, + "epoch": 0.02457627118644068, + "grad_norm": 0.0, + "learning_rate": 4.915254237288136e-06, + "loss": 0.0, + "num_tokens": 257614.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 29 + }, + { + "clip_ratio": 0.00021371705224737525, + "epoch": 0.025423728813559324, + "grad_norm": 0.0, + "learning_rate": 5.084745762711865e-06, + "loss": 0.0, + "step": 30 + }, + { + "clip_ratio": 0.00016422003682237118, + "epoch": 0.026271186440677965, + "grad_norm": 0.0, + "learning_rate": 5.254237288135594e-06, + "loss": 0.0, + "step": 31 + }, + { + "clip_ratio": 0.000256577244726941, + "epoch": 0.02711864406779661, + "grad_norm": 0.0, + "learning_rate": 5.423728813559323e-06, + "loss": 0.0, + "step": 32 + }, + { + "clip_ratio": 0.00045646229409612715, + "completion_length": 465.1250305175781, + "epoch": 0.027966101694915254, + "grad_norm": 0.017873238036622066, + "learning_rate": 5.593220338983051e-06, + "loss": 0.0246, + "num_tokens": 290581.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 33 + }, + { + "clip_ratio": 0.0006314113852567971, + "epoch": 0.0288135593220339, + "grad_norm": 0.01732638271233714, + "learning_rate": 5.7627118644067805e-06, + "loss": 0.0247, + "step": 34 + }, + { + "clip_ratio": 0.00045800459338352084, + "epoch": 0.029661016949152543, + "grad_norm": 0.017593288926627842, + "learning_rate": 5.932203389830509e-06, + "loss": 0.0247, + "step": 35 + }, + { + "clip_ratio": 0.0004213759966660291, + "epoch": 0.030508474576271188, + "grad_norm": 0.017758527483606314, + "learning_rate": 6.1016949152542385e-06, + "loss": 0.0247, + "step": 36 + }, + { + "clip_ratio": 0.00027920620050281286, + "completion_length": 487.982177734375, + "epoch": 0.03135593220338983, + "grad_norm": 0.017492673426871806, + "learning_rate": 6.271186440677966e-06, + "loss": 0.0287, + "num_tokens": 325036.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 37 + }, + { + "clip_ratio": 0.0003654122701846063, + "epoch": 0.03220338983050847, + "grad_norm": 0.016942624524485753, + "learning_rate": 6.440677966101695e-06, + "loss": 0.0287, + "step": 38 + }, + { + "clip_ratio": 0.0002445173158776015, + "epoch": 0.03305084745762712, + "grad_norm": 0.017357366453315624, + "learning_rate": 6.610169491525424e-06, + "loss": 0.0287, + "step": 39 + }, + { + "clip_ratio": 0.00027939456049352884, + "epoch": 0.03389830508474576, + "grad_norm": 0.017497160548341977, + "learning_rate": 6.779661016949153e-06, + "loss": 0.0287, + "step": 40 + }, + { + "clip_ratio": 0.00030169120873324573, + "completion_length": 337.76788330078125, + "epoch": 0.03474576271186441, + "grad_norm": 0.013386997712677729, + "learning_rate": 6.949152542372882e-06, + "loss": 0.0194, + "num_tokens": 351879.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 41 + }, + { + "clip_ratio": 0.000481679366203025, + "epoch": 0.03559322033898305, + "grad_norm": 0.013534365829241167, + "learning_rate": 7.1186440677966106e-06, + "loss": 0.0194, + "step": 42 + }, + { + "clip_ratio": 0.0006071141688153148, + "epoch": 0.036440677966101696, + "grad_norm": 0.013688658779614732, + "learning_rate": 7.288135593220339e-06, + "loss": 0.0193, + "step": 43 + }, + { + "clip_ratio": 0.0005443710251711309, + "epoch": 0.03728813559322034, + "grad_norm": 0.013415623466192152, + "learning_rate": 7.4576271186440685e-06, + "loss": 0.0194, + "step": 44 + }, + { + "clip_ratio": 0.00027171947294846177, + "completion_length": 358.6964416503906, + "epoch": 0.038135593220338986, + "grad_norm": 0.0, + "learning_rate": 7.627118644067797e-06, + "loss": 0.0, + "num_tokens": 379414.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 45 + }, + { + "clip_ratio": 0.00027013494400307536, + "epoch": 0.03898305084745763, + "grad_norm": 0.0, + "learning_rate": 7.796610169491526e-06, + "loss": 0.0, + "step": 46 + }, + { + "clip_ratio": 0.00023684222833253443, + "epoch": 0.03983050847457627, + "grad_norm": 0.0, + "learning_rate": 7.966101694915255e-06, + "loss": 0.0, + "step": 47 + }, + { + "clip_ratio": 0.0004315820406191051, + "epoch": 0.04067796610169491, + "grad_norm": 0.0, + "learning_rate": 8.135593220338983e-06, + "loss": 0.0, + "step": 48 + }, + { + "clip_ratio": 0.00034640118246898055, + "completion_length": 392.46429443359375, + "epoch": 0.04152542372881356, + "grad_norm": 0.05155975490631469, + "learning_rate": 8.305084745762712e-06, + "loss": -0.023, + "num_tokens": 408424.0, + "reward": -0.8571429252624512, + "reward_std": 0.24888646602630615, + "rewards/check_winston_local_func/mean": -0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 49 + }, + { + "clip_ratio": 0.00034579477505758405, + "epoch": 0.0423728813559322, + "grad_norm": 0.051568368553185584, + "learning_rate": 8.47457627118644e-06, + "loss": -0.0233, + "step": 50 + }, + { + "clip_ratio": 0.0005872369511052966, + "epoch": 0.043220338983050846, + "grad_norm": 0.054569986775825835, + "learning_rate": 8.64406779661017e-06, + "loss": -0.0235, + "step": 51 + }, + { + "clip_ratio": 0.00048618926666677, + "epoch": 0.04406779661016949, + "grad_norm": 0.05573624590215382, + "learning_rate": 8.8135593220339e-06, + "loss": -0.0236, + "step": 52 + }, + { + "clip_ratio": 0.000333156727720052, + "completion_length": 485.7500305175781, + "epoch": 0.044915254237288135, + "grad_norm": 0.0, + "learning_rate": 8.983050847457628e-06, + "loss": 0.0, + "num_tokens": 442986.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 53 + }, + { + "clip_ratio": 0.00042045177542604506, + "epoch": 0.04576271186440678, + "grad_norm": 0.0, + "learning_rate": 9.152542372881356e-06, + "loss": 0.0, + "step": 54 + }, + { + "clip_ratio": 0.00031678256345912814, + "epoch": 0.046610169491525424, + "grad_norm": 0.0, + "learning_rate": 9.322033898305085e-06, + "loss": 0.0, + "step": 55 + }, + { + "clip_ratio": 0.00010463170474395156, + "epoch": 0.04745762711864407, + "grad_norm": 0.0, + "learning_rate": 9.491525423728815e-06, + "loss": 0.0, + "step": 56 + }, + { + "clip_ratio": 0.0007074553286656737, + "completion_length": 428.3214416503906, + "epoch": 0.048305084745762714, + "grad_norm": 0.04153528214569023, + "learning_rate": 9.661016949152544e-06, + "loss": 0.0343, + "num_tokens": 473892.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 57 + }, + { + "clip_ratio": 0.0004013319849036634, + "epoch": 0.04915254237288136, + "grad_norm": 0.04657277213309362, + "learning_rate": 9.830508474576272e-06, + "loss": 0.0342, + "step": 58 + }, + { + "clip_ratio": 0.00044179416727274656, + "epoch": 0.05, + "grad_norm": 0.045153415468062494, + "learning_rate": 1e-05, + "loss": 0.0343, + "step": 59 + }, + { + "clip_ratio": 0.0007794442353770137, + "epoch": 0.05084745762711865, + "grad_norm": 0.035363902861678634, + "learning_rate": 1.016949152542373e-05, + "loss": 0.0339, + "step": 60 + }, + { + "clip_ratio": 0.00021712151647079736, + "completion_length": 299.8035888671875, + "epoch": 0.051694915254237285, + "grad_norm": 0.07205399219848665, + "learning_rate": 1.0338983050847458e-05, + "loss": 0.0477, + "num_tokens": 497465.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 61 + }, + { + "clip_ratio": 0.0002563712769187987, + "epoch": 0.05254237288135593, + "grad_norm": 0.07155354465871978, + "learning_rate": 1.0508474576271188e-05, + "loss": 0.0475, + "step": 62 + }, + { + "clip_ratio": 0.0001442718057660386, + "epoch": 0.053389830508474574, + "grad_norm": 0.07289445064494822, + "learning_rate": 1.0677966101694917e-05, + "loss": 0.0474, + "step": 63 + }, + { + "clip_ratio": 0.001116903149522841, + "epoch": 0.05423728813559322, + "grad_norm": 0.06596181254777028, + "learning_rate": 1.0847457627118645e-05, + "loss": 0.0468, + "step": 64 + }, + { + "clip_ratio": 0.00027901786961592734, + "completion_length": 480.4464416503906, + "epoch": 0.05508474576271186, + "grad_norm": 0.0, + "learning_rate": 1.1016949152542374e-05, + "loss": 0.0, + "num_tokens": 532266.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 65 + }, + { + "clip_ratio": 0.00037270825123414397, + "epoch": 0.05593220338983051, + "grad_norm": 0.0, + "learning_rate": 1.1186440677966102e-05, + "loss": 0.0, + "step": 66 + }, + { + "clip_ratio": 0.0006563978386111557, + "epoch": 0.05677966101694915, + "grad_norm": 0.0, + "learning_rate": 1.1355932203389833e-05, + "loss": 0.0, + "step": 67 + }, + { + "clip_ratio": 0.0008186621707864106, + "epoch": 0.0576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.1525423728813561e-05, + "loss": 0.0, + "step": 68 + }, + { + "clip_ratio": 0.0005370522267185152, + "completion_length": 420.3214416503906, + "epoch": 0.05847457627118644, + "grad_norm": 0.0, + "learning_rate": 1.169491525423729e-05, + "loss": 0.0, + "num_tokens": 563380.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 69 + }, + { + "clip_ratio": 0.0007551547605544329, + "epoch": 0.059322033898305086, + "grad_norm": 0.0, + "learning_rate": 1.1864406779661018e-05, + "loss": 0.0, + "step": 70 + }, + { + "clip_ratio": 0.0004996137577109039, + "epoch": 0.06016949152542373, + "grad_norm": 0.0, + "learning_rate": 1.2033898305084745e-05, + "loss": 0.0, + "step": 71 + }, + { + "clip_ratio": 0.0007176484214141965, + "epoch": 0.061016949152542375, + "grad_norm": 0.0, + "learning_rate": 1.2203389830508477e-05, + "loss": 0.0, + "step": 72 + }, + { + "clip_ratio": 0.0004170738684479147, + "completion_length": 383.6964416503906, + "epoch": 0.06186440677966102, + "grad_norm": 0.01481240616851262, + "learning_rate": 1.2372881355932205e-05, + "loss": 0.0412, + "num_tokens": 592003.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 73 + }, + { + "clip_ratio": 0.0008365331450477242, + "epoch": 0.06271186440677966, + "grad_norm": 0.01522897212214854, + "learning_rate": 1.2542372881355932e-05, + "loss": 0.0411, + "step": 74 + }, + { + "clip_ratio": 0.000981268472969532, + "epoch": 0.0635593220338983, + "grad_norm": 0.014948882448171377, + "learning_rate": 1.2711864406779661e-05, + "loss": 0.0411, + "step": 75 + }, + { + "clip_ratio": 0.0006704007391817868, + "epoch": 0.06440677966101695, + "grad_norm": 0.015045917131498382, + "learning_rate": 1.288135593220339e-05, + "loss": 0.041, + "step": 76 + }, + { + "clip_ratio": 0.00022424904454965144, + "completion_length": 437.9821472167969, + "epoch": 0.06525423728813559, + "grad_norm": 0.030968041587588573, + "learning_rate": 1.305084745762712e-05, + "loss": 0.0453, + "num_tokens": 623050.0, + "reward": -0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 77 + }, + { + "clip_ratio": 0.00053448136895895, + "epoch": 0.06610169491525424, + "grad_norm": 0.02976001587219013, + "learning_rate": 1.3220338983050848e-05, + "loss": 0.0453, + "step": 78 + }, + { + "clip_ratio": 0.0010130176087841392, + "epoch": 0.06694915254237288, + "grad_norm": 0.02743385432574901, + "learning_rate": 1.3389830508474577e-05, + "loss": 0.045, + "step": 79 + }, + { + "clip_ratio": 0.0011749044060707092, + "epoch": 0.06779661016949153, + "grad_norm": 0.025462048937107604, + "learning_rate": 1.3559322033898305e-05, + "loss": 0.045, + "step": 80 + }, + { + "clip_ratio": 0.001996266655623913, + "completion_length": 382.2321472167969, + "epoch": 0.06864406779661017, + "grad_norm": 0.13457631329414246, + "learning_rate": 1.3728813559322034e-05, + "loss": 0.0135, + "num_tokens": 651839.0, + "reward": -0.6785714626312256, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.6785714030265808, + "rewards/check_winston_local_func/std": 0.7411819696426392, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 81 + }, + { + "clip_ratio": 0.003203267464414239, + "epoch": 0.06949152542372881, + "grad_norm": 0.11807541511453928, + "learning_rate": 1.3898305084745764e-05, + "loss": 0.0128, + "step": 82 + }, + { + "clip_ratio": 0.011069249361753464, + "epoch": 0.07033898305084746, + "grad_norm": 0.0768781703261771, + "learning_rate": 1.4067796610169493e-05, + "loss": 0.0118, + "step": 83 + }, + { + "clip_ratio": 0.013229678384959698, + "epoch": 0.0711864406779661, + "grad_norm": 0.07925229229917279, + "learning_rate": 1.4237288135593221e-05, + "loss": 0.011, + "step": 84 + }, + { + "clip_ratio": 0.0002107896434608847, + "completion_length": 397.1964416503906, + "epoch": 0.07203389830508475, + "grad_norm": 0.0461083173277337, + "learning_rate": 1.440677966101695e-05, + "loss": 0.0389, + "num_tokens": 681218.0, + "reward": -0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 85 + }, + { + "clip_ratio": 0.0010596371721476316, + "epoch": 0.07288135593220339, + "grad_norm": 0.04449467794694347, + "learning_rate": 1.4576271186440678e-05, + "loss": 0.0384, + "step": 86 + }, + { + "clip_ratio": 0.002870997181162238, + "epoch": 0.07372881355932204, + "grad_norm": 0.038978879976910054, + "learning_rate": 1.4745762711864408e-05, + "loss": 0.038, + "step": 87 + }, + { + "clip_ratio": 0.006624125875532627, + "epoch": 0.07457627118644068, + "grad_norm": 0.0364842012372814, + "learning_rate": 1.4915254237288137e-05, + "loss": 0.0377, + "step": 88 + }, + { + "clip_ratio": 0.00043057286529801786, + "completion_length": 399.64288330078125, + "epoch": 0.07542372881355933, + "grad_norm": 0.014090924578944663, + "learning_rate": 1.5084745762711865e-05, + "loss": 0.0328, + "num_tokens": 711078.0, + "reward": -0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": -0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 89 + }, + { + "clip_ratio": 0.0018296982161700726, + "epoch": 0.07627118644067797, + "grad_norm": 0.014531205963070252, + "learning_rate": 1.5254237288135594e-05, + "loss": 0.0328, + "step": 90 + }, + { + "clip_ratio": 0.004530549980700016, + "epoch": 0.07711864406779662, + "grad_norm": 0.014754831265979268, + "learning_rate": 1.5423728813559326e-05, + "loss": 0.0327, + "step": 91 + }, + { + "clip_ratio": 0.008132151328027248, + "epoch": 0.07796610169491526, + "grad_norm": 0.014608619166449479, + "learning_rate": 1.5593220338983053e-05, + "loss": 0.0326, + "step": 92 + }, + { + "clip_ratio": 0.0007373582920990884, + "completion_length": 467.71429443359375, + "epoch": 0.0788135593220339, + "grad_norm": 0.041297580984419976, + "learning_rate": 1.576271186440678e-05, + "loss": 0.0616, + "num_tokens": 745862.0, + "reward": -0.8928571939468384, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 93 + }, + { + "clip_ratio": 0.001287531922571361, + "epoch": 0.07966101694915254, + "grad_norm": 0.030858648065290283, + "learning_rate": 1.593220338983051e-05, + "loss": 0.0614, + "step": 94 + }, + { + "clip_ratio": 0.0023924303241074085, + "epoch": 0.08050847457627118, + "grad_norm": 0.03463914321182917, + "learning_rate": 1.6101694915254237e-05, + "loss": 0.0613, + "step": 95 + }, + { + "clip_ratio": 0.00350037869066, + "epoch": 0.08135593220338982, + "grad_norm": 0.02665011286164521, + "learning_rate": 1.6271186440677967e-05, + "loss": 0.0611, + "step": 96 + }, + { + "clip_ratio": 0.0006918495637364686, + "completion_length": 320.75, + "epoch": 0.08220338983050847, + "grad_norm": 0.06373891470490567, + "learning_rate": 1.6440677966101697e-05, + "loss": -0.015, + "num_tokens": 771576.0, + "reward": -0.7500000596046448, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 97 + }, + { + "clip_ratio": 0.0029753418639302254, + "epoch": 0.08305084745762711, + "grad_norm": 0.05523249333421511, + "learning_rate": 1.6610169491525424e-05, + "loss": -0.0157, + "step": 98 + }, + { + "clip_ratio": 0.00716389948502183, + "epoch": 0.08389830508474576, + "grad_norm": 0.04924083222576615, + "learning_rate": 1.6779661016949154e-05, + "loss": -0.0158, + "step": 99 + }, + { + "clip_ratio": 0.011036296375095844, + "epoch": 0.0847457627118644, + "grad_norm": 0.04955323333773024, + "learning_rate": 1.694915254237288e-05, + "loss": -0.0163, + "step": 100 + }, + { + "clip_ratio": 0.00038607188616879284, + "completion_length": 507.2500305175781, + "epoch": 0.08559322033898305, + "grad_norm": 0.0, + "learning_rate": 1.711864406779661e-05, + "loss": 0.0, + "num_tokens": 807230.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 101 + }, + { + "clip_ratio": 0.0004233713843859732, + "epoch": 0.08644067796610169, + "grad_norm": 0.0, + "learning_rate": 1.728813559322034e-05, + "loss": 0.0, + "step": 102 + }, + { + "clip_ratio": 0.0005304253427311778, + "epoch": 0.08728813559322034, + "grad_norm": 0.0, + "learning_rate": 1.745762711864407e-05, + "loss": 0.0, + "step": 103 + }, + { + "clip_ratio": 0.0008094432414509356, + "epoch": 0.08813559322033898, + "grad_norm": 0.0, + "learning_rate": 1.76271186440678e-05, + "loss": 0.0, + "step": 104 + }, + { + "clip_ratio": 0.0003136220038868487, + "completion_length": 309.4821472167969, + "epoch": 0.08898305084745763, + "grad_norm": 0.1215376293190595, + "learning_rate": 1.7796610169491526e-05, + "loss": 0.059, + "num_tokens": 830873.0, + "reward": -0.6071428656578064, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": -0.6071428656578064, + "rewards/check_winston_local_func/std": 0.8017837405204773, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 105 + }, + { + "clip_ratio": 0.005680752452462912, + "epoch": 0.08983050847457627, + "grad_norm": 0.08882976004122672, + "learning_rate": 1.7966101694915256e-05, + "loss": 0.057, + "step": 106 + }, + { + "clip_ratio": 0.013865095563232899, + "epoch": 0.09067796610169492, + "grad_norm": 0.07178187465318808, + "learning_rate": 1.8135593220338986e-05, + "loss": 0.0551, + "step": 107 + }, + { + "clip_ratio": 0.025337526574730873, + "epoch": 0.09152542372881356, + "grad_norm": 0.05889114052835241, + "learning_rate": 1.8305084745762713e-05, + "loss": 0.054, + "step": 108 + }, + { + "clip_ratio": 0.0004973930190317333, + "completion_length": 309.2857360839844, + "epoch": 0.0923728813559322, + "grad_norm": 0.10159993090017184, + "learning_rate": 1.8474576271186443e-05, + "loss": 0.1029, + "num_tokens": 856689.0, + "reward": -0.7500000596046448, + "reward_std": 0.4123912453651428, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 109 + }, + { + "clip_ratio": 0.005504293367266655, + "epoch": 0.09322033898305085, + "grad_norm": 0.09390182129772277, + "learning_rate": 1.864406779661017e-05, + "loss": 0.1017, + "step": 110 + }, + { + "clip_ratio": 0.022907190024852753, + "epoch": 0.0940677966101695, + "grad_norm": 0.08701453983072766, + "learning_rate": 1.88135593220339e-05, + "loss": 0.0999, + "step": 111 + }, + { + "clip_ratio": 0.04514092579483986, + "epoch": 0.09491525423728814, + "grad_norm": 0.08477253768734147, + "learning_rate": 1.898305084745763e-05, + "loss": 0.0987, + "step": 112 + }, + { + "clip_ratio": 0.0005664547788910568, + "completion_length": 434.39288330078125, + "epoch": 0.09576271186440678, + "grad_norm": 0.0, + "learning_rate": 1.9152542372881357e-05, + "loss": 0.0, + "num_tokens": 888255.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 113 + }, + { + "clip_ratio": 0.0015907255001366138, + "epoch": 0.09661016949152543, + "grad_norm": 0.0, + "learning_rate": 1.9322033898305087e-05, + "loss": 0.0, + "step": 114 + }, + { + "clip_ratio": 0.003365863347426057, + "epoch": 0.09745762711864407, + "grad_norm": 0.0, + "learning_rate": 1.9491525423728814e-05, + "loss": 0.0, + "step": 115 + }, + { + "clip_ratio": 0.006915883626788855, + "epoch": 0.09830508474576272, + "grad_norm": 0.0, + "learning_rate": 1.9661016949152545e-05, + "loss": 0.0, + "step": 116 + }, + { + "clip_ratio": 0.0015928384382277727, + "completion_length": 311.08929443359375, + "epoch": 0.09915254237288136, + "grad_norm": 0.1669528890016949, + "learning_rate": 1.9830508474576275e-05, + "loss": 0.0592, + "num_tokens": 912948.0, + "reward": -0.785714328289032, + "reward_std": 0.28498074412345886, + "rewards/check_winston_local_func/mean": -0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241877675056458, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 117 + }, + { + "clip_ratio": 0.006070761010050774, + "epoch": 0.1, + "grad_norm": 0.15701074253607375, + "learning_rate": 2e-05, + "loss": 0.056, + "step": 118 + }, + { + "clip_ratio": 0.03282368928194046, + "epoch": 0.10084745762711865, + "grad_norm": 0.21942626154682726, + "learning_rate": 2.016949152542373e-05, + "loss": 0.0526, + "step": 119 + }, + { + "clip_ratio": 0.0628986731171608, + "epoch": 0.1016949152542373, + "grad_norm": 0.1568339023062343, + "learning_rate": 2.033898305084746e-05, + "loss": 0.0497, + "step": 120 + }, + { + "clip_ratio": 0.0003240547957830131, + "completion_length": 490.607177734375, + "epoch": 0.10254237288135593, + "grad_norm": 0.0, + "learning_rate": 2.0508474576271186e-05, + "loss": 0.0, + "num_tokens": 947318.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": -1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 121 + }, + { + "clip_ratio": 0.00037375700776465237, + "epoch": 0.10338983050847457, + "grad_norm": 0.0, + "learning_rate": 2.0677966101694916e-05, + "loss": 0.0, + "step": 122 + }, + { + "clip_ratio": 0.0011371899163350463, + "epoch": 0.10423728813559321, + "grad_norm": 0.0, + "learning_rate": 2.084745762711865e-05, + "loss": 0.0, + "step": 123 + }, + { + "clip_ratio": 0.0022452734410762787, + "epoch": 0.10508474576271186, + "grad_norm": 0.0, + "learning_rate": 2.1016949152542376e-05, + "loss": 0.0, + "step": 124 + }, + { + "clip_ratio": 0.004924725275486708, + "completion_length": 324.58929443359375, + "epoch": 0.1059322033898305, + "grad_norm": 0.3997089536055672, + "learning_rate": 2.1186440677966103e-05, + "loss": 0.04, + "num_tokens": 972527.0, + "reward": -0.8214285969734192, + "reward_std": 0.36553531885147095, + "rewards/check_winston_local_func/mean": -0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 125 + }, + { + "clip_ratio": 0.036066196858882904, + "epoch": 0.10677966101694915, + "grad_norm": 0.4003737832223874, + "learning_rate": 2.1355932203389833e-05, + "loss": 0.0371, + "step": 126 + }, + { + "clip_ratio": 0.06804865598678589, + "epoch": 0.10762711864406779, + "grad_norm": 0.3262616499772286, + "learning_rate": 2.152542372881356e-05, + "loss": 0.0328, + "step": 127 + }, + { + "clip_ratio": 0.08261267095804214, + "epoch": 0.10847457627118644, + "grad_norm": 0.19475445080797668, + "learning_rate": 2.169491525423729e-05, + "loss": 0.0284, + "step": 128 + }, + { + "clip_ratio": 0.00042747953557409346, + "completion_length": 441.6785888671875, + "epoch": 0.10932203389830508, + "grad_norm": 0.07121815374577634, + "learning_rate": 2.1864406779661017e-05, + "loss": 0.0215, + "num_tokens": 1005157.0, + "reward": -0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": -0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 129 + }, + { + "clip_ratio": 0.0005996564286760986, + "epoch": 0.11016949152542373, + "grad_norm": 0.07374447574020743, + "learning_rate": 2.2033898305084748e-05, + "loss": 0.021, + "step": 130 + }, + { + "clip_ratio": 0.0070611475966870785, + "epoch": 0.11101694915254237, + "grad_norm": 0.0484843694410488, + "learning_rate": 2.2203389830508474e-05, + "loss": 0.02, + "step": 131 + }, + { + "clip_ratio": 0.02419929951429367, + "epoch": 0.11186440677966102, + "grad_norm": 0.03734227928764934, + "learning_rate": 2.2372881355932205e-05, + "loss": 0.0194, + "step": 132 + }, + { + "clip_ratio": 0.0008097242680378258, + "completion_length": 299.9285888671875, + "epoch": 0.11271186440677966, + "grad_norm": 0.2037296860020652, + "learning_rate": 2.2542372881355935e-05, + "loss": 0.0123, + "num_tokens": 1029577.0, + "reward": -0.4285714626312256, + "reward_std": 0.49777287244796753, + "rewards/check_winston_local_func/mean": -0.4285714328289032, + "rewards/check_winston_local_func/std": 0.9116845726966858, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 133 + }, + { + "clip_ratio": 0.010970565490424633, + "epoch": 0.1135593220338983, + "grad_norm": 0.15172018259613887, + "learning_rate": 2.2711864406779665e-05, + "loss": 0.0095, + "step": 134 + }, + { + "clip_ratio": 0.027290966361761093, + "epoch": 0.11440677966101695, + "grad_norm": 0.14632003828933562, + "learning_rate": 2.2881355932203392e-05, + "loss": 0.0066, + "step": 135 + }, + { + "clip_ratio": 0.04884405434131622, + "epoch": 0.1152542372881356, + "grad_norm": 0.13010992493757564, + "learning_rate": 2.3050847457627122e-05, + "loss": 0.0037, + "step": 136 + }, + { + "clip_ratio": 0.00016204381245188415, + "completion_length": 397.9821472167969, + "epoch": 0.11610169491525424, + "grad_norm": 0.0819715923540025, + "learning_rate": 2.322033898305085e-05, + "loss": 0.0348, + "num_tokens": 1059368.0, + "reward": -0.7500000596046448, + "reward_std": 0.3499017357826233, + "rewards/check_winston_local_func/mean": -0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 137 + }, + { + "clip_ratio": 0.0012223825324326754, + "epoch": 0.11694915254237288, + "grad_norm": 0.07970324522981491, + "learning_rate": 2.338983050847458e-05, + "loss": 0.0336, + "step": 138 + }, + { + "clip_ratio": 0.015393489971756935, + "epoch": 0.11779661016949153, + "grad_norm": 0.07570693688371119, + "learning_rate": 2.3559322033898306e-05, + "loss": 0.0321, + "step": 139 + }, + { + "clip_ratio": 0.07253921031951904, + "epoch": 0.11864406779661017, + "grad_norm": 0.05800544884381334, + "learning_rate": 2.3728813559322036e-05, + "loss": 0.0305, + "step": 140 + }, + { + "clip_ratio": 0.00020609110652003437, + "completion_length": 376.3035888671875, + "epoch": 0.11949152542372882, + "grad_norm": 0.16488571125022886, + "learning_rate": 2.3898305084745763e-05, + "loss": -0.0156, + "num_tokens": 1088561.0, + "reward": -0.5, + "reward_std": 0.686587929725647, + "rewards/check_winston_local_func/mean": -0.5, + "rewards/check_winston_local_func/std": 0.8738628625869751, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 141 + }, + { + "clip_ratio": 0.020974619314074516, + "epoch": 0.12033898305084746, + "grad_norm": 0.12033253885509411, + "learning_rate": 2.406779661016949e-05, + "loss": -0.02, + "step": 142 + }, + { + "clip_ratio": 0.14757588505744934, + "epoch": 0.1211864406779661, + "grad_norm": 0.18906094003962706, + "learning_rate": 2.4237288135593224e-05, + "loss": -0.0215, + "step": 143 + }, + { + "clip_ratio": 0.18001240491867065, + "epoch": 0.12203389830508475, + "grad_norm": 0.2094330456679022, + "learning_rate": 2.4406779661016954e-05, + "loss": -0.0238, + "step": 144 + }, + { + "clip_ratio": 0.0010827317601069808, + "completion_length": 216.85714721679688, + "epoch": 0.1228813559322034, + "grad_norm": 0.22593574409537565, + "learning_rate": 2.457627118644068e-05, + "loss": -0.057, + "num_tokens": 1107713.0, + "reward": -0.0357142873108387, + "reward_std": 0.808063805103302, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 145 + }, + { + "clip_ratio": 0.01685175858438015, + "epoch": 0.12372881355932204, + "grad_norm": 0.21920453847219976, + "learning_rate": 2.474576271186441e-05, + "loss": -0.0622, + "step": 146 + }, + { + "clip_ratio": 0.05698274075984955, + "epoch": 0.12457627118644068, + "grad_norm": 0.23790061749019706, + "learning_rate": 2.4915254237288138e-05, + "loss": -0.0672, + "step": 147 + }, + { + "clip_ratio": 0.06983836740255356, + "epoch": 0.12542372881355932, + "grad_norm": 0.19359662720887325, + "learning_rate": 2.5084745762711865e-05, + "loss": -0.0724, + "step": 148 + }, + { + "clip_ratio": 0.0013232758501544595, + "completion_length": 251.96429443359375, + "epoch": 0.12627118644067797, + "grad_norm": 0.27961740628458276, + "learning_rate": 2.5254237288135595e-05, + "loss": 0.06, + "num_tokens": 1129487.0, + "reward": -0.0357142873108387, + "reward_std": 0.9462584257125854, + "rewards/check_winston_local_func/mean": -0.0357142873108387, + "rewards/check_winston_local_func/std": 1.0084062814712524, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 149 + }, + { + "clip_ratio": 0.03364234417676926, + "epoch": 0.1271186440677966, + "grad_norm": 0.19276991014072303, + "learning_rate": 2.5423728813559322e-05, + "loss": 0.054, + "step": 150 + }, + { + "clip_ratio": 0.1430949568748474, + "epoch": 0.12796610169491526, + "grad_norm": 0.2768368269508983, + "learning_rate": 2.5593220338983052e-05, + "loss": 0.0518, + "step": 151 + }, + { + "clip_ratio": 0.16415317356586456, + "epoch": 0.1288135593220339, + "grad_norm": 0.25743304440606246, + "learning_rate": 2.576271186440678e-05, + "loss": 0.0475, + "step": 152 + }, + { + "clip_ratio": 0.0013469145633280277, + "completion_length": 204.48214721679688, + "epoch": 0.12966101694915255, + "grad_norm": 0.28188012404317475, + "learning_rate": 2.5932203389830512e-05, + "loss": 0.0527, + "num_tokens": 1148354.0, + "reward": 0.1428571492433548, + "reward_std": 0.7129831910133362, + "rewards/check_winston_local_func/mean": 0.1428571492433548, + "rewards/check_winston_local_func/std": 0.9987004995346069, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 153 + }, + { + "clip_ratio": 0.016695290803909302, + "epoch": 0.13050847457627118, + "grad_norm": 0.2641379759457116, + "learning_rate": 2.610169491525424e-05, + "loss": 0.0473, + "step": 154 + }, + { + "clip_ratio": 0.05237039551138878, + "epoch": 0.13135593220338984, + "grad_norm": 0.20691108630731772, + "learning_rate": 2.627118644067797e-05, + "loss": 0.0414, + "step": 155 + }, + { + "clip_ratio": 0.0867982804775238, + "epoch": 0.13220338983050847, + "grad_norm": 0.15341544674011254, + "learning_rate": 2.6440677966101696e-05, + "loss": 0.0351, + "step": 156 + }, + { + "clip_ratio": 0.0006545564392581582, + "completion_length": 233.9285888671875, + "epoch": 0.13305084745762713, + "grad_norm": 0.16036976523795443, + "learning_rate": 2.6610169491525427e-05, + "loss": 0.0179, + "num_tokens": 1168622.0, + "reward": 0.7142857313156128, + "reward_std": 0.4016071856021881, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 157 + }, + { + "clip_ratio": 0.00993060227483511, + "epoch": 0.13389830508474576, + "grad_norm": 0.1298083776077636, + "learning_rate": 2.6779661016949153e-05, + "loss": 0.0151, + "step": 158 + }, + { + "clip_ratio": 0.0733163133263588, + "epoch": 0.13474576271186442, + "grad_norm": 0.11590218855503849, + "learning_rate": 2.6949152542372884e-05, + "loss": 0.0125, + "step": 159 + }, + { + "clip_ratio": 0.14935636520385742, + "epoch": 0.13559322033898305, + "grad_norm": 0.16154268567658825, + "learning_rate": 2.711864406779661e-05, + "loss": 0.011, + "step": 160 + }, + { + "clip_ratio": 0.0009650280699133873, + "completion_length": 174.7678680419922, + "epoch": 0.13644067796610168, + "grad_norm": 0.15404950919743313, + "learning_rate": 2.728813559322034e-05, + "loss": 0.0078, + "num_tokens": 1185697.0, + "reward": 0.8571429252624512, + "reward_std": 0.3342905640602112, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 161 + }, + { + "clip_ratio": 0.004791476763784885, + "epoch": 0.13728813559322034, + "grad_norm": 0.12682344230599282, + "learning_rate": 2.7457627118644068e-05, + "loss": 0.0056, + "step": 162 + }, + { + "clip_ratio": 0.023417560383677483, + "epoch": 0.13813559322033897, + "grad_norm": 0.0948693079603576, + "learning_rate": 2.76271186440678e-05, + "loss": 0.003, + "step": 163 + }, + { + "clip_ratio": 0.07911951839923859, + "epoch": 0.13898305084745763, + "grad_norm": 0.09089932231497586, + "learning_rate": 2.7796610169491528e-05, + "loss": 0.0007, + "step": 164 + }, + { + "clip_ratio": 0.000979878008365631, + "completion_length": 126.64286041259766, + "epoch": 0.13983050847457626, + "grad_norm": 0.1801163708005843, + "learning_rate": 2.7966101694915258e-05, + "loss": -0.0396, + "num_tokens": 1199565.0, + "reward": 0.7500000596046448, + "reward_std": 0.3859959840774536, + "rewards/check_winston_local_func/mean": 0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 165 + }, + { + "clip_ratio": 0.009913308545947075, + "epoch": 0.14067796610169492, + "grad_norm": 0.14588220837158195, + "learning_rate": 2.8135593220338985e-05, + "loss": -0.0428, + "step": 166 + }, + { + "clip_ratio": 0.07110879570245743, + "epoch": 0.14152542372881355, + "grad_norm": 0.276973278154756, + "learning_rate": 2.8305084745762715e-05, + "loss": -0.0441, + "step": 167 + }, + { + "clip_ratio": 0.06909574568271637, + "epoch": 0.1423728813559322, + "grad_norm": 0.12488402451050255, + "learning_rate": 2.8474576271186442e-05, + "loss": -0.0494, + "step": 168 + }, + { + "clip_ratio": 0.0003819709818344563, + "completion_length": 152.73214721679688, + "epoch": 0.14322033898305084, + "grad_norm": 0.3195642927880649, + "learning_rate": 2.8644067796610172e-05, + "loss": 0.0302, + "num_tokens": 1214790.0, + "reward": 0.7142857313156128, + "reward_std": 0.3342905342578888, + "rewards/check_winston_local_func/mean": 0.7142857313156128, + "rewards/check_winston_local_func/std": 0.7061878442764282, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 169 + }, + { + "clip_ratio": 0.015475978143513203, + "epoch": 0.1440677966101695, + "grad_norm": 0.2360393211362849, + "learning_rate": 2.88135593220339e-05, + "loss": 0.0228, + "step": 170 + }, + { + "clip_ratio": 0.08493895828723907, + "epoch": 0.14491525423728813, + "grad_norm": 0.17350104363138513, + "learning_rate": 2.8983050847457626e-05, + "loss": 0.0163, + "step": 171 + }, + { + "clip_ratio": 0.14768318831920624, + "epoch": 0.14576271186440679, + "grad_norm": 0.19569281232532856, + "learning_rate": 2.9152542372881356e-05, + "loss": 0.013, + "step": 172 + }, + { + "clip_ratio": 0.006150017958134413, + "completion_length": 186.25001525878906, + "epoch": 0.14661016949152542, + "grad_norm": 0.06068449124289285, + "learning_rate": 2.932203389830509e-05, + "loss": -0.0169, + "num_tokens": 1232564.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 173 + }, + { + "clip_ratio": 0.014521388337016106, + "epoch": 0.14745762711864407, + "grad_norm": 0.05994459441740582, + "learning_rate": 2.9491525423728817e-05, + "loss": -0.0174, + "step": 174 + }, + { + "clip_ratio": 0.04354570060968399, + "epoch": 0.1483050847457627, + "grad_norm": 0.06278027199945566, + "learning_rate": 2.9661016949152547e-05, + "loss": -0.0183, + "step": 175 + }, + { + "clip_ratio": 0.10504651814699173, + "epoch": 0.14915254237288136, + "grad_norm": 0.04416483226500781, + "learning_rate": 2.9830508474576274e-05, + "loss": -0.0193, + "step": 176 + }, + { + "clip_ratio": 0.003162125591188669, + "completion_length": 163.17857360839844, + "epoch": 0.15, + "grad_norm": 0.11359153510598317, + "learning_rate": 3.0000000000000004e-05, + "loss": -0.0335, + "num_tokens": 1248550.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 177 + }, + { + "clip_ratio": 0.01111722644418478, + "epoch": 0.15084745762711865, + "grad_norm": 0.10416258904679447, + "learning_rate": 3.016949152542373e-05, + "loss": -0.0347, + "step": 178 + }, + { + "clip_ratio": 0.04117439687252045, + "epoch": 0.15169491525423728, + "grad_norm": 0.08204255975558637, + "learning_rate": 3.0338983050847458e-05, + "loss": -0.0364, + "step": 179 + }, + { + "clip_ratio": 0.08657827973365784, + "epoch": 0.15254237288135594, + "grad_norm": 0.08178448057500348, + "learning_rate": 3.0508474576271188e-05, + "loss": -0.038, + "step": 180 + }, + { + "clip_ratio": 0.010199248790740967, + "completion_length": 162.35714721679688, + "epoch": 0.15338983050847457, + "grad_norm": 0.4819050859019718, + "learning_rate": 3.067796610169492e-05, + "loss": 0.06, + "num_tokens": 1264994.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 181 + }, + { + "clip_ratio": 0.060436759144067764, + "epoch": 0.15423728813559323, + "grad_norm": 0.1959898799735129, + "learning_rate": 3.084745762711865e-05, + "loss": 0.0533, + "step": 182 + }, + { + "clip_ratio": 0.13463598489761353, + "epoch": 0.15508474576271186, + "grad_norm": 0.12678282333898375, + "learning_rate": 3.101694915254238e-05, + "loss": 0.0482, + "step": 183 + }, + { + "clip_ratio": 0.19176946580410004, + "epoch": 0.15593220338983052, + "grad_norm": 0.10756609820315277, + "learning_rate": 3.1186440677966106e-05, + "loss": 0.0463, + "step": 184 + }, + { + "clip_ratio": 0.0008241009199991822, + "completion_length": 237.60714721679688, + "epoch": 0.15677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.135593220338983e-05, + "loss": 0.0, + "num_tokens": 1286164.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 185 + }, + { + "clip_ratio": 0.002994579030200839, + "epoch": 0.1576271186440678, + "grad_norm": 0.0, + "learning_rate": 3.152542372881356e-05, + "loss": 0.0, + "step": 186 + }, + { + "clip_ratio": 0.00574068445712328, + "epoch": 0.15847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.169491525423729e-05, + "loss": 0.0, + "step": 187 + }, + { + "clip_ratio": 0.012791804037988186, + "epoch": 0.15932203389830507, + "grad_norm": 0.0, + "learning_rate": 3.186440677966102e-05, + "loss": 0.0, + "step": 188 + }, + { + "clip_ratio": 0.006764067802578211, + "completion_length": 143.94644165039062, + "epoch": 0.16016949152542373, + "grad_norm": 0.04704135237627796, + "learning_rate": 3.203389830508475e-05, + "loss": -0.0095, + "num_tokens": 1301409.0, + "reward": 0.9285714626312256, + "reward_std": 0.1322600245475769, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 189 + }, + { + "clip_ratio": 0.013139193877577782, + "epoch": 0.16101694915254236, + "grad_norm": 0.04532372769932697, + "learning_rate": 3.2203389830508473e-05, + "loss": -0.0098, + "step": 190 + }, + { + "clip_ratio": 0.03423069044947624, + "epoch": 0.16186440677966102, + "grad_norm": 0.040646403971755785, + "learning_rate": 3.237288135593221e-05, + "loss": -0.0105, + "step": 191 + }, + { + "clip_ratio": 0.06455554068088531, + "epoch": 0.16271186440677965, + "grad_norm": 0.03643001220928061, + "learning_rate": 3.2542372881355934e-05, + "loss": -0.0113, + "step": 192 + }, + { + "clip_ratio": 0.0007823093910701573, + "completion_length": 229.9107208251953, + "epoch": 0.1635593220338983, + "grad_norm": 0.0, + "learning_rate": 3.271186440677967e-05, + "loss": 0.0, + "num_tokens": 1321708.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 193 + }, + { + "clip_ratio": 0.0008988279732875526, + "epoch": 0.16440677966101694, + "grad_norm": 0.0, + "learning_rate": 3.2881355932203394e-05, + "loss": 0.0, + "step": 194 + }, + { + "clip_ratio": 0.003465626621618867, + "epoch": 0.1652542372881356, + "grad_norm": 0.0, + "learning_rate": 3.305084745762712e-05, + "loss": 0.0, + "step": 195 + }, + { + "clip_ratio": 0.008655412122607231, + "epoch": 0.16610169491525423, + "grad_norm": 0.0, + "learning_rate": 3.322033898305085e-05, + "loss": 0.0, + "step": 196 + }, + { + "clip_ratio": 0.0021059864666312933, + "completion_length": 166.85714721679688, + "epoch": 0.1669491525423729, + "grad_norm": 0.17307734331449404, + "learning_rate": 3.338983050847458e-05, + "loss": -0.0036, + "num_tokens": 1338540.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 197 + }, + { + "clip_ratio": 0.009274979121983051, + "epoch": 0.16779661016949152, + "grad_norm": 0.10860266060182006, + "learning_rate": 3.355932203389831e-05, + "loss": -0.0066, + "step": 198 + }, + { + "clip_ratio": 0.03715561330318451, + "epoch": 0.16864406779661018, + "grad_norm": 0.09136703784102146, + "learning_rate": 3.3728813559322035e-05, + "loss": -0.008, + "step": 199 + }, + { + "clip_ratio": 0.06759678572416306, + "epoch": 0.1694915254237288, + "grad_norm": 0.08121070179066665, + "learning_rate": 3.389830508474576e-05, + "loss": -0.009, + "step": 200 + }, + { + "clip_ratio": 0.0002369106950936839, + "completion_length": 132.625, + "epoch": 0.17033898305084746, + "grad_norm": 0.12222790896016958, + "learning_rate": 3.406779661016949e-05, + "loss": 0.0931, + "num_tokens": 1352735.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 201 + }, + { + "clip_ratio": 0.0015077884308993816, + "epoch": 0.1711864406779661, + "grad_norm": 0.12028738542362348, + "learning_rate": 3.423728813559322e-05, + "loss": 0.0915, + "step": 202 + }, + { + "clip_ratio": 0.011990153230726719, + "epoch": 0.17203389830508475, + "grad_norm": 0.10639183565121645, + "learning_rate": 3.4406779661016956e-05, + "loss": 0.0873, + "step": 203 + }, + { + "clip_ratio": 0.05813857913017273, + "epoch": 0.17288135593220338, + "grad_norm": 0.08983262526351615, + "learning_rate": 3.457627118644068e-05, + "loss": 0.0833, + "step": 204 + }, + { + "clip_ratio": 0.001714512356556952, + "completion_length": 90.37500762939453, + "epoch": 0.17372881355932204, + "grad_norm": 0.10843637606790192, + "learning_rate": 3.474576271186441e-05, + "loss": 0.0164, + "num_tokens": 1365892.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 205 + }, + { + "clip_ratio": 0.017641481012105942, + "epoch": 0.17457627118644067, + "grad_norm": 0.08303991648667351, + "learning_rate": 3.491525423728814e-05, + "loss": 0.0148, + "step": 206 + }, + { + "clip_ratio": 0.15765391290187836, + "epoch": 0.17542372881355933, + "grad_norm": 0.07279655924549996, + "learning_rate": 3.5084745762711864e-05, + "loss": 0.0138, + "step": 207 + }, + { + "clip_ratio": 0.2804856598377228, + "epoch": 0.17627118644067796, + "grad_norm": 0.09315271598947107, + "learning_rate": 3.52542372881356e-05, + "loss": 0.0135, + "step": 208 + }, + { + "clip_ratio": 0.003159541869536042, + "completion_length": 67.30357360839844, + "epoch": 0.17711864406779662, + "grad_norm": 0.0, + "learning_rate": 3.5423728813559324e-05, + "loss": 0.0, + "num_tokens": 1376973.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 209 + }, + { + "clip_ratio": 0.004362096078693867, + "epoch": 0.17796610169491525, + "grad_norm": 0.0, + "learning_rate": 3.559322033898305e-05, + "loss": 0.0, + "step": 210 + }, + { + "clip_ratio": 0.01770210638642311, + "epoch": 0.1788135593220339, + "grad_norm": 0.0, + "learning_rate": 3.576271186440678e-05, + "loss": 0.0, + "step": 211 + }, + { + "clip_ratio": 0.035751208662986755, + "epoch": 0.17966101694915254, + "grad_norm": 0.0, + "learning_rate": 3.593220338983051e-05, + "loss": 0.0, + "step": 212 + }, + { + "clip_ratio": 0.0035622839350253344, + "completion_length": 65.08928680419922, + "epoch": 0.1805084745762712, + "grad_norm": 0.0, + "learning_rate": 3.610169491525424e-05, + "loss": 0.0, + "num_tokens": 1387570.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 213 + }, + { + "clip_ratio": 0.0025523039512336254, + "epoch": 0.18135593220338983, + "grad_norm": 0.0, + "learning_rate": 3.627118644067797e-05, + "loss": 0.0, + "step": 214 + }, + { + "clip_ratio": 0.005835308227688074, + "epoch": 0.18220338983050846, + "grad_norm": 0.0, + "learning_rate": 3.64406779661017e-05, + "loss": 0.0, + "step": 215 + }, + { + "clip_ratio": 0.011904297396540642, + "epoch": 0.18305084745762712, + "grad_norm": 0.0, + "learning_rate": 3.6610169491525426e-05, + "loss": 0.0, + "step": 216 + }, + { + "clip_ratio": 0.0003092146071139723, + "completion_length": 51.10714340209961, + "epoch": 0.18389830508474575, + "grad_norm": 0.356952256149441, + "learning_rate": 3.677966101694915e-05, + "loss": -0.0154, + "num_tokens": 1397320.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 217 + }, + { + "clip_ratio": 0.11051050573587418, + "epoch": 0.1847457627118644, + "grad_norm": 0.1542677635762948, + "learning_rate": 3.6949152542372886e-05, + "loss": -0.019, + "step": 218 + }, + { + "clip_ratio": 0.18382969498634338, + "epoch": 0.18559322033898304, + "grad_norm": 0.13966519767464722, + "learning_rate": 3.711864406779661e-05, + "loss": -0.0211, + "step": 219 + }, + { + "clip_ratio": 0.27009809017181396, + "epoch": 0.1864406779661017, + "grad_norm": 0.08132731257822706, + "learning_rate": 3.728813559322034e-05, + "loss": -0.0238, + "step": 220 + }, + { + "clip_ratio": 0.006028716918081045, + "completion_length": 83.26786041259766, + "epoch": 0.18728813559322033, + "grad_norm": 0.0, + "learning_rate": 3.745762711864407e-05, + "loss": 0.0, + "num_tokens": 1409935.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 221 + }, + { + "clip_ratio": 0.021660711616277695, + "epoch": 0.188135593220339, + "grad_norm": 0.0, + "learning_rate": 3.76271186440678e-05, + "loss": 0.0, + "step": 222 + }, + { + "clip_ratio": 0.06699295341968536, + "epoch": 0.18898305084745762, + "grad_norm": 0.0, + "learning_rate": 3.779661016949153e-05, + "loss": 0.0, + "step": 223 + }, + { + "clip_ratio": 0.1347362995147705, + "epoch": 0.18983050847457628, + "grad_norm": 0.0, + "learning_rate": 3.796610169491526e-05, + "loss": 0.0, + "step": 224 + }, + { + "clip_ratio": 0.0006836191168986261, + "completion_length": 97.92857360839844, + "epoch": 0.1906779661016949, + "grad_norm": 0.05873233342660551, + "learning_rate": 3.813559322033899e-05, + "loss": 0.0388, + "num_tokens": 1422307.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 225 + }, + { + "clip_ratio": 0.0010315729305148125, + "epoch": 0.19152542372881357, + "grad_norm": 0.05937392738616397, + "learning_rate": 3.8305084745762714e-05, + "loss": 0.0386, + "step": 226 + }, + { + "clip_ratio": 0.01008252426981926, + "epoch": 0.1923728813559322, + "grad_norm": 0.0544038037479039, + "learning_rate": 3.847457627118644e-05, + "loss": 0.0371, + "step": 227 + }, + { + "clip_ratio": 0.0420512929558754, + "epoch": 0.19322033898305085, + "grad_norm": 0.047388133840752925, + "learning_rate": 3.8644067796610175e-05, + "loss": 0.0356, + "step": 228 + }, + { + "clip_ratio": 0.003829076187685132, + "completion_length": 48.48214340209961, + "epoch": 0.19406779661016949, + "grad_norm": 0.24338559301731436, + "learning_rate": 3.88135593220339e-05, + "loss": -0.0201, + "num_tokens": 1432182.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 229 + }, + { + "clip_ratio": 0.041657134890556335, + "epoch": 0.19491525423728814, + "grad_norm": 0.15635724094524717, + "learning_rate": 3.898305084745763e-05, + "loss": -0.026, + "step": 230 + }, + { + "clip_ratio": 0.16935327649116516, + "epoch": 0.19576271186440677, + "grad_norm": 0.11486942308015832, + "learning_rate": 3.9152542372881355e-05, + "loss": -0.0295, + "step": 231 + }, + { + "clip_ratio": 0.22958868741989136, + "epoch": 0.19661016949152543, + "grad_norm": 0.10892713241904037, + "learning_rate": 3.932203389830509e-05, + "loss": -0.0314, + "step": 232 + }, + { + "clip_ratio": 0.003605353645980358, + "completion_length": 66.10714721679688, + "epoch": 0.19745762711864406, + "grad_norm": 0.24973476558992524, + "learning_rate": 3.9491525423728816e-05, + "loss": -0.0118, + "num_tokens": 1443140.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 233 + }, + { + "clip_ratio": 0.029195427894592285, + "epoch": 0.19830508474576272, + "grad_norm": 0.1595699714332021, + "learning_rate": 3.966101694915255e-05, + "loss": -0.0189, + "step": 234 + }, + { + "clip_ratio": 0.10283487290143967, + "epoch": 0.19915254237288135, + "grad_norm": 0.11474727019285232, + "learning_rate": 3.9830508474576276e-05, + "loss": -0.0247, + "step": 235 + }, + { + "clip_ratio": 0.15862122178077698, + "epoch": 0.2, + "grad_norm": 0.10043744347803148, + "learning_rate": 4e-05, + "loss": -0.029, + "step": 236 + }, + { + "clip_ratio": 0.0013605443527922034, + "completion_length": 74.55357360839844, + "epoch": 0.20084745762711864, + "grad_norm": 0.13987954732136554, + "learning_rate": 3.9981167608286254e-05, + "loss": -0.0433, + "num_tokens": 1454515.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 237 + }, + { + "clip_ratio": 0.010904515162110329, + "epoch": 0.2016949152542373, + "grad_norm": 0.10644135758363778, + "learning_rate": 3.9962335216572505e-05, + "loss": -0.0464, + "step": 238 + }, + { + "clip_ratio": 0.05173995718359947, + "epoch": 0.20254237288135593, + "grad_norm": 0.08987160994189367, + "learning_rate": 3.994350282485876e-05, + "loss": -0.0494, + "step": 239 + }, + { + "clip_ratio": 0.10260221362113953, + "epoch": 0.2033898305084746, + "grad_norm": 0.07087528775663905, + "learning_rate": 3.9924670433145014e-05, + "loss": -0.0523, + "step": 240 + }, + { + "clip_ratio": 0.0006479613948613405, + "completion_length": 73.28572082519531, + "epoch": 0.20423728813559322, + "grad_norm": 0.3445626306759668, + "learning_rate": 3.9905838041431265e-05, + "loss": 0.0365, + "num_tokens": 1466203.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 241 + }, + { + "clip_ratio": 0.061018019914627075, + "epoch": 0.20508474576271185, + "grad_norm": 0.17586034661375718, + "learning_rate": 3.9887005649717516e-05, + "loss": 0.0285, + "step": 242 + }, + { + "clip_ratio": 0.21443673968315125, + "epoch": 0.2059322033898305, + "grad_norm": 0.16852265377216533, + "learning_rate": 3.986817325800377e-05, + "loss": 0.0245, + "step": 243 + }, + { + "clip_ratio": 0.3032749891281128, + "epoch": 0.20677966101694914, + "grad_norm": 0.16538030606379006, + "learning_rate": 3.984934086629002e-05, + "loss": 0.0219, + "step": 244 + }, + { + "clip_ratio": 0.001159251551143825, + "completion_length": 80.375, + "epoch": 0.2076271186440678, + "grad_norm": 0.17416776142167675, + "learning_rate": 3.9830508474576276e-05, + "loss": -0.0293, + "num_tokens": 1477712.0, + "reward": 0.8928571939468384, + "reward_std": 0.147871196269989, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 245 + }, + { + "clip_ratio": 0.018606197088956833, + "epoch": 0.20847457627118643, + "grad_norm": 0.13288705840160372, + "learning_rate": 3.981167608286253e-05, + "loss": -0.0342, + "step": 246 + }, + { + "clip_ratio": 0.07409250736236572, + "epoch": 0.2093220338983051, + "grad_norm": 0.11062194988477761, + "learning_rate": 3.979284369114878e-05, + "loss": -0.0387, + "step": 247 + }, + { + "clip_ratio": 0.13684464991092682, + "epoch": 0.21016949152542372, + "grad_norm": 0.09630587836377022, + "learning_rate": 3.9774011299435036e-05, + "loss": -0.0423, + "step": 248 + }, + { + "clip_ratio": 0.0009059179574251175, + "completion_length": 55.892860412597656, + "epoch": 0.21101694915254238, + "grad_norm": 0.30277389882138056, + "learning_rate": 3.975517890772128e-05, + "loss": -0.0232, + "num_tokens": 1489946.0, + "reward": 0.8214285969734192, + "reward_std": 0.2801312208175659, + "rewards/check_winston_local_func/mean": 0.8214285969734192, + "rewards/check_winston_local_func/std": 0.5754727125167847, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 249 + }, + { + "clip_ratio": 0.05273974686861038, + "epoch": 0.211864406779661, + "grad_norm": 0.20551737732816863, + "learning_rate": 3.973634651600754e-05, + "loss": -0.0352, + "step": 250 + }, + { + "clip_ratio": 0.15495876967906952, + "epoch": 0.21271186440677967, + "grad_norm": 0.23192855972985502, + "learning_rate": 3.971751412429379e-05, + "loss": -0.0428, + "step": 251 + }, + { + "clip_ratio": 0.17651182413101196, + "epoch": 0.2135593220338983, + "grad_norm": 0.153802982923592, + "learning_rate": 3.969868173258004e-05, + "loss": -0.0503, + "step": 252 + }, + { + "clip_ratio": 0.001365313190035522, + "completion_length": 74.5, + "epoch": 0.21440677966101696, + "grad_norm": 0.1294886128912843, + "learning_rate": 3.967984934086629e-05, + "loss": -0.0165, + "num_tokens": 1501150.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 253 + }, + { + "clip_ratio": 0.029109954833984375, + "epoch": 0.21525423728813559, + "grad_norm": 0.07796443960667383, + "learning_rate": 3.966101694915255e-05, + "loss": -0.0187, + "step": 254 + }, + { + "clip_ratio": 0.1373310536146164, + "epoch": 0.21610169491525424, + "grad_norm": 0.0719203190228422, + "learning_rate": 3.9642184557438794e-05, + "loss": -0.0211, + "step": 255 + }, + { + "clip_ratio": 0.24434244632720947, + "epoch": 0.21694915254237288, + "grad_norm": 0.0766668656235949, + "learning_rate": 3.962335216572505e-05, + "loss": -0.023, + "step": 256 + }, + { + "clip_ratio": 0.0014079277170822024, + "completion_length": 76.76786041259766, + "epoch": 0.21779661016949153, + "grad_norm": 0.14896557612902658, + "learning_rate": 3.96045197740113e-05, + "loss": -0.048, + "num_tokens": 1513457.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 257 + }, + { + "clip_ratio": 0.008702627383172512, + "epoch": 0.21864406779661016, + "grad_norm": 0.12790944068712845, + "learning_rate": 3.9585687382297554e-05, + "loss": -0.0515, + "step": 258 + }, + { + "clip_ratio": 0.05537901073694229, + "epoch": 0.21949152542372882, + "grad_norm": 0.0901106115692995, + "learning_rate": 3.956685499058381e-05, + "loss": -0.0559, + "step": 259 + }, + { + "clip_ratio": 0.12627661228179932, + "epoch": 0.22033898305084745, + "grad_norm": 0.08842019141814955, + "learning_rate": 3.954802259887006e-05, + "loss": -0.0589, + "step": 260 + }, + { + "clip_ratio": 0.001680672401562333, + "completion_length": 68.46428680419922, + "epoch": 0.2211864406779661, + "grad_norm": 0.45455295411779023, + "learning_rate": 3.9529190207156314e-05, + "loss": 0.0253, + "num_tokens": 1523675.0, + "reward": 0.785714328289032, + "reward_std": 0.35475122928619385, + "rewards/check_winston_local_func/mean": 0.7857142686843872, + "rewards/check_winston_local_func/std": 0.6241878271102905, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 261 + }, + { + "clip_ratio": 0.08457090705633163, + "epoch": 0.22203389830508474, + "grad_norm": 0.2750192097608069, + "learning_rate": 3.9510357815442565e-05, + "loss": 0.0122, + "step": 262 + }, + { + "clip_ratio": 0.14618617296218872, + "epoch": 0.2228813559322034, + "grad_norm": 0.21776056884947845, + "learning_rate": 3.9491525423728816e-05, + "loss": 0.0035, + "step": 263 + }, + { + "clip_ratio": 0.16797243058681488, + "epoch": 0.22372881355932203, + "grad_norm": 0.1552963639704198, + "learning_rate": 3.947269303201507e-05, + "loss": -0.0027, + "step": 264 + }, + { + "clip_ratio": 0.0041149333119392395, + "completion_length": 92.05357360839844, + "epoch": 0.2245762711864407, + "grad_norm": 0.0, + "learning_rate": 3.9453860640301325e-05, + "loss": 0.0, + "num_tokens": 1537926.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 265 + }, + { + "clip_ratio": 0.015582824125885963, + "epoch": 0.22542372881355932, + "grad_norm": 0.0, + "learning_rate": 3.943502824858757e-05, + "loss": 0.0, + "step": 266 + }, + { + "clip_ratio": 0.05549276992678642, + "epoch": 0.22627118644067798, + "grad_norm": 0.0, + "learning_rate": 3.941619585687383e-05, + "loss": 0.0, + "step": 267 + }, + { + "clip_ratio": 0.10348478704690933, + "epoch": 0.2271186440677966, + "grad_norm": 0.0, + "learning_rate": 3.939736346516008e-05, + "loss": 0.0, + "step": 268 + }, + { + "clip_ratio": 0.0024110758677124977, + "completion_length": 132.5357208251953, + "epoch": 0.22796610169491524, + "grad_norm": 0.09866009376925343, + "learning_rate": 3.937853107344633e-05, + "loss": -0.0252, + "num_tokens": 1552892.0, + "reward": 0.8928571939468384, + "reward_std": 0.30304574966430664, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 269 + }, + { + "clip_ratio": 0.009020349942147732, + "epoch": 0.2288135593220339, + "grad_norm": 0.08611769992817986, + "learning_rate": 3.935969868173259e-05, + "loss": -0.0267, + "step": 270 + }, + { + "clip_ratio": 0.03130246326327324, + "epoch": 0.22966101694915253, + "grad_norm": 0.08283957691220468, + "learning_rate": 3.934086629001884e-05, + "loss": -0.0288, + "step": 271 + }, + { + "clip_ratio": 0.06420135498046875, + "epoch": 0.2305084745762712, + "grad_norm": 0.07319871503015539, + "learning_rate": 3.932203389830509e-05, + "loss": -0.0314, + "step": 272 + }, + { + "clip_ratio": 0.0030047716572880745, + "completion_length": 85.9464340209961, + "epoch": 0.23135593220338982, + "grad_norm": 0.42479217252605955, + "learning_rate": 3.930320150659134e-05, + "loss": 0.0151, + "num_tokens": 1564737.0, + "reward": 0.8571429252624512, + "reward_std": 0.3342905342578888, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 273 + }, + { + "clip_ratio": 0.046567775309085846, + "epoch": 0.23220338983050848, + "grad_norm": 0.25875493543994865, + "learning_rate": 3.928436911487759e-05, + "loss": 0.0039, + "step": 274 + }, + { + "clip_ratio": 0.11683137714862823, + "epoch": 0.2330508474576271, + "grad_norm": 0.15568587648106266, + "learning_rate": 3.926553672316384e-05, + "loss": -0.0039, + "step": 275 + }, + { + "clip_ratio": 0.1598564237356186, + "epoch": 0.23389830508474577, + "grad_norm": 0.12141989924883649, + "learning_rate": 3.92467043314501e-05, + "loss": -0.0076, + "step": 276 + }, + { + "clip_ratio": 0.001374253653921187, + "completion_length": 122.96429443359375, + "epoch": 0.2347457627118644, + "grad_norm": 0.18782615643703865, + "learning_rate": 3.922787193973635e-05, + "loss": -0.026, + "num_tokens": 1579727.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 277 + }, + { + "clip_ratio": 0.017132315784692764, + "epoch": 0.23559322033898306, + "grad_norm": 0.12401604739808049, + "learning_rate": 3.92090395480226e-05, + "loss": -0.031, + "step": 278 + }, + { + "clip_ratio": 0.05705662816762924, + "epoch": 0.2364406779661017, + "grad_norm": 0.0762353013620226, + "learning_rate": 3.919020715630885e-05, + "loss": -0.034, + "step": 279 + }, + { + "clip_ratio": 0.09824671596288681, + "epoch": 0.23728813559322035, + "grad_norm": 0.07414316824181627, + "learning_rate": 3.9171374764595104e-05, + "loss": -0.0361, + "step": 280 + }, + { + "clip_ratio": 0.006891847122460604, + "completion_length": 111.14286041259766, + "epoch": 0.23813559322033898, + "grad_norm": 0.44887370177976565, + "learning_rate": 3.9152542372881355e-05, + "loss": -0.0147, + "num_tokens": 1592855.0, + "reward": 0.7500000596046448, + "reward_std": 0.637336254119873, + "rewards/check_winston_local_func/mean": 0.75, + "rewards/check_winston_local_func/std": 0.6674237847328186, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 281 + }, + { + "clip_ratio": 0.0862836092710495, + "epoch": 0.23898305084745763, + "grad_norm": 0.2905997622704694, + "learning_rate": 3.913370998116761e-05, + "loss": -0.0255, + "step": 282 + }, + { + "clip_ratio": 0.13213881850242615, + "epoch": 0.23983050847457626, + "grad_norm": 0.20046065755709935, + "learning_rate": 3.9114877589453864e-05, + "loss": -0.036, + "step": 283 + }, + { + "clip_ratio": 0.20514217019081116, + "epoch": 0.24067796610169492, + "grad_norm": 0.17822083347245274, + "learning_rate": 3.9096045197740115e-05, + "loss": -0.0417, + "step": 284 + }, + { + "clip_ratio": 0.0007382220355793834, + "completion_length": 90.14286041259766, + "epoch": 0.24152542372881355, + "grad_norm": 0.05844943977633127, + "learning_rate": 3.907721280602637e-05, + "loss": -0.0145, + "num_tokens": 1605287.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 285 + }, + { + "clip_ratio": 0.006259975954890251, + "epoch": 0.2423728813559322, + "grad_norm": 0.04883518588447698, + "learning_rate": 3.905838041431262e-05, + "loss": -0.015, + "step": 286 + }, + { + "clip_ratio": 0.023042459040880203, + "epoch": 0.24322033898305084, + "grad_norm": 0.04225419018938037, + "learning_rate": 3.9039548022598875e-05, + "loss": -0.0156, + "step": 287 + }, + { + "clip_ratio": 0.04386242851614952, + "epoch": 0.2440677966101695, + "grad_norm": 0.03866602121110847, + "learning_rate": 3.9020715630885127e-05, + "loss": -0.0163, + "step": 288 + }, + { + "clip_ratio": 0.0029555519577115774, + "completion_length": 144.98214721679688, + "epoch": 0.24491525423728813, + "grad_norm": 0.1262690850004453, + "learning_rate": 3.900188323917138e-05, + "loss": -0.0343, + "num_tokens": 1620742.0, + "reward": 0.8571429252624512, + "reward_std": 0.4040610194206238, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 289 + }, + { + "clip_ratio": 0.0132750254124403, + "epoch": 0.2457627118644068, + "grad_norm": 0.10191416605166659, + "learning_rate": 3.898305084745763e-05, + "loss": -0.0377, + "step": 290 + }, + { + "clip_ratio": 0.043000176548957825, + "epoch": 0.24661016949152542, + "grad_norm": 0.08414775760035112, + "learning_rate": 3.896421845574388e-05, + "loss": -0.0405, + "step": 291 + }, + { + "clip_ratio": 0.07677298784255981, + "epoch": 0.24745762711864408, + "grad_norm": 0.07673330413564883, + "learning_rate": 3.894538606403013e-05, + "loss": -0.0436, + "step": 292 + }, + { + "clip_ratio": 0.0007677033427171409, + "completion_length": 314.64288330078125, + "epoch": 0.2483050847457627, + "grad_norm": 0.05727067166697756, + "learning_rate": 3.892655367231639e-05, + "loss": 0.0061, + "num_tokens": 1645538.0, + "reward": 0.8571429252624512, + "reward_std": 0.15272071957588196, + "rewards/check_winston_local_func/mean": 0.8571428656578064, + "rewards/check_winston_local_func/std": 0.5197402238845825, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 293 + }, + { + "clip_ratio": 0.000989561784081161, + "epoch": 0.24915254237288137, + "grad_norm": 0.05658381403070837, + "learning_rate": 3.890772128060264e-05, + "loss": 0.0055, + "step": 294 + }, + { + "clip_ratio": 0.002105026040226221, + "epoch": 0.25, + "grad_norm": 0.05358318750720369, + "learning_rate": 3.888888888888889e-05, + "loss": 0.0045, + "step": 295 + }, + { + "clip_ratio": 0.008737047202885151, + "epoch": 0.25084745762711863, + "grad_norm": 0.04703537375755522, + "learning_rate": 3.887005649717515e-05, + "loss": 0.003, + "step": 296 + }, + { + "clip_ratio": 0.001839210744947195, + "completion_length": 134.08929443359375, + "epoch": 0.25169491525423726, + "grad_norm": 0.13649134617830305, + "learning_rate": 3.885122410546139e-05, + "loss": -0.0194, + "num_tokens": 1660599.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 297 + }, + { + "clip_ratio": 0.009821072220802307, + "epoch": 0.25254237288135595, + "grad_norm": 0.0691252012642643, + "learning_rate": 3.883239171374765e-05, + "loss": -0.0207, + "step": 298 + }, + { + "clip_ratio": 0.027830438688397408, + "epoch": 0.2533898305084746, + "grad_norm": 0.04974246695392892, + "learning_rate": 3.88135593220339e-05, + "loss": -0.0215, + "step": 299 + }, + { + "clip_ratio": 0.05817332863807678, + "epoch": 0.2542372881355932, + "grad_norm": 0.04524622630022836, + "learning_rate": 3.879472693032015e-05, + "loss": -0.022, + "step": 300 + }, + { + "clip_ratio": 0.001304431352764368, + "completion_length": 191.7678680419922, + "epoch": 0.25508474576271184, + "grad_norm": 0.07787541743964174, + "learning_rate": 3.8775894538606404e-05, + "loss": -0.0228, + "num_tokens": 1680114.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 301 + }, + { + "clip_ratio": 0.004079771228134632, + "epoch": 0.2559322033898305, + "grad_norm": 0.05792631765695611, + "learning_rate": 3.875706214689266e-05, + "loss": -0.0237, + "step": 302 + }, + { + "clip_ratio": 0.01483174879103899, + "epoch": 0.25677966101694916, + "grad_norm": 0.04458591657155666, + "learning_rate": 3.8738229755178906e-05, + "loss": -0.0243, + "step": 303 + }, + { + "clip_ratio": 0.03137718886137009, + "epoch": 0.2576271186440678, + "grad_norm": 0.03880399913720864, + "learning_rate": 3.8719397363465164e-05, + "loss": -0.0248, + "step": 304 + }, + { + "clip_ratio": 0.0008109563495963812, + "completion_length": 160.80357360839844, + "epoch": 0.2584745762711864, + "grad_norm": 0.20027349236527825, + "learning_rate": 3.8700564971751415e-05, + "loss": -0.0224, + "num_tokens": 1697895.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 305 + }, + { + "clip_ratio": 0.0038879578933119774, + "epoch": 0.2593220338983051, + "grad_norm": 0.07886223922966976, + "learning_rate": 3.8681732580037666e-05, + "loss": -0.0235, + "step": 306 + }, + { + "clip_ratio": 0.014749433845281601, + "epoch": 0.26016949152542374, + "grad_norm": 0.06786394961277091, + "learning_rate": 3.8662900188323924e-05, + "loss": -0.0247, + "step": 307 + }, + { + "clip_ratio": 0.03604491055011749, + "epoch": 0.26101694915254237, + "grad_norm": 0.041254536906425165, + "learning_rate": 3.8644067796610175e-05, + "loss": -0.0257, + "step": 308 + }, + { + "clip_ratio": 0.000806977681349963, + "completion_length": 233.62501525878906, + "epoch": 0.261864406779661, + "grad_norm": 0.08944516343119709, + "learning_rate": 3.8625235404896426e-05, + "loss": -0.0013, + "num_tokens": 1719706.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 309 + }, + { + "clip_ratio": 0.0043969168327748775, + "epoch": 0.2627118644067797, + "grad_norm": 0.08008566583785545, + "learning_rate": 3.860640301318268e-05, + "loss": -0.0023, + "step": 310 + }, + { + "clip_ratio": 0.016115259379148483, + "epoch": 0.2635593220338983, + "grad_norm": 0.06746261744912278, + "learning_rate": 3.858757062146893e-05, + "loss": -0.0044, + "step": 311 + }, + { + "clip_ratio": 0.029712393879890442, + "epoch": 0.26440677966101694, + "grad_norm": 0.05954201685230599, + "learning_rate": 3.856873822975518e-05, + "loss": -0.0066, + "step": 312 + }, + { + "clip_ratio": 0.0019188802689313889, + "completion_length": 118.87500762939453, + "epoch": 0.2652542372881356, + "grad_norm": 0.2459608147132861, + "learning_rate": 3.854990583804144e-05, + "loss": 0.0138, + "num_tokens": 1733891.0, + "reward": 0.6785714626312256, + "reward_std": 0.39675766229629517, + "rewards/check_winston_local_func/mean": 0.6785714030265808, + "rewards/check_winston_local_func/std": 0.7411819696426392, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 313 + }, + { + "clip_ratio": 0.011992106214165688, + "epoch": 0.26610169491525426, + "grad_norm": 0.1848848068319577, + "learning_rate": 3.853107344632769e-05, + "loss": 0.0079, + "step": 314 + }, + { + "clip_ratio": 0.05709777772426605, + "epoch": 0.2669491525423729, + "grad_norm": 0.146233205223168, + "learning_rate": 3.851224105461394e-05, + "loss": 0.0011, + "step": 315 + }, + { + "clip_ratio": 0.10807797312736511, + "epoch": 0.2677966101694915, + "grad_norm": 0.16941844805388373, + "learning_rate": 3.849340866290019e-05, + "loss": -0.0039, + "step": 316 + }, + { + "clip_ratio": 0.0004728636995423585, + "completion_length": 202.80357360839844, + "epoch": 0.26864406779661015, + "grad_norm": 0.0, + "learning_rate": 3.847457627118644e-05, + "loss": 0.0, + "num_tokens": 1753696.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 317 + }, + { + "clip_ratio": 0.0017543105641379952, + "epoch": 0.26949152542372884, + "grad_norm": 0.0, + "learning_rate": 3.84557438794727e-05, + "loss": 0.0, + "step": 318 + }, + { + "clip_ratio": 0.0028700276743620634, + "epoch": 0.27033898305084747, + "grad_norm": 0.0, + "learning_rate": 3.843691148775895e-05, + "loss": 0.0, + "step": 319 + }, + { + "clip_ratio": 0.005041220691055059, + "epoch": 0.2711864406779661, + "grad_norm": 0.0, + "learning_rate": 3.84180790960452e-05, + "loss": 0.0, + "step": 320 + }, + { + "clip_ratio": 0.0006087662768550217, + "completion_length": 246.4285888671875, + "epoch": 0.27203389830508473, + "grad_norm": 0.02696783441349338, + "learning_rate": 3.839924670433145e-05, + "loss": -0.0045, + "num_tokens": 1776128.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 321 + }, + { + "clip_ratio": 0.0007834911812096834, + "epoch": 0.27288135593220336, + "grad_norm": 0.026934160082675403, + "learning_rate": 3.8380414312617703e-05, + "loss": -0.0046, + "step": 322 + }, + { + "clip_ratio": 0.0016883478965610266, + "epoch": 0.27372881355932205, + "grad_norm": 0.026427549336641994, + "learning_rate": 3.8361581920903955e-05, + "loss": -0.0047, + "step": 323 + }, + { + "clip_ratio": 0.004345850553363562, + "epoch": 0.2745762711864407, + "grad_norm": 0.026125606288862162, + "learning_rate": 3.834274952919021e-05, + "loss": -0.0052, + "step": 324 + }, + { + "clip_ratio": 0.0007442247588187456, + "completion_length": 262.21429443359375, + "epoch": 0.2754237288135593, + "grad_norm": 0.0, + "learning_rate": 3.8323917137476463e-05, + "loss": 0.0, + "num_tokens": 1797244.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 325 + }, + { + "clip_ratio": 0.000711060652974993, + "epoch": 0.27627118644067794, + "grad_norm": 0.0, + "learning_rate": 3.8305084745762714e-05, + "loss": 0.0, + "step": 326 + }, + { + "clip_ratio": 0.0014260082971304655, + "epoch": 0.2771186440677966, + "grad_norm": 0.0, + "learning_rate": 3.828625235404897e-05, + "loss": 0.0, + "step": 327 + }, + { + "clip_ratio": 0.001994561171159148, + "epoch": 0.27796610169491526, + "grad_norm": 0.0, + "learning_rate": 3.826741996233522e-05, + "loss": 0.0, + "step": 328 + }, + { + "clip_ratio": 0.001084706513211131, + "completion_length": 172.08929443359375, + "epoch": 0.2788135593220339, + "grad_norm": 0.0876294357565793, + "learning_rate": 3.8248587570621474e-05, + "loss": 0.0001, + "num_tokens": 1813569.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 329 + }, + { + "clip_ratio": 0.002805770607665181, + "epoch": 0.2796610169491525, + "grad_norm": 0.08561321931994853, + "learning_rate": 3.8229755178907726e-05, + "loss": -0.001, + "step": 330 + }, + { + "clip_ratio": 0.015290237963199615, + "epoch": 0.2805084745762712, + "grad_norm": 0.05204164151508037, + "learning_rate": 3.8210922787193977e-05, + "loss": -0.0022, + "step": 331 + }, + { + "clip_ratio": 0.02808833308517933, + "epoch": 0.28135593220338984, + "grad_norm": 0.047916681538162136, + "learning_rate": 3.819209039548023e-05, + "loss": -0.0028, + "step": 332 + }, + { + "clip_ratio": 0.00172739801928401, + "completion_length": 148.0178680419922, + "epoch": 0.28220338983050847, + "grad_norm": 0.0, + "learning_rate": 3.8173258003766486e-05, + "loss": 0.0, + "num_tokens": 1829578.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 333 + }, + { + "clip_ratio": 0.006786983925849199, + "epoch": 0.2830508474576271, + "grad_norm": 0.0, + "learning_rate": 3.815442561205273e-05, + "loss": 0.0, + "step": 334 + }, + { + "clip_ratio": 0.018832042813301086, + "epoch": 0.2838983050847458, + "grad_norm": 0.0, + "learning_rate": 3.813559322033899e-05, + "loss": 0.0, + "step": 335 + }, + { + "clip_ratio": 0.03414842113852501, + "epoch": 0.2847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.811676082862524e-05, + "loss": 0.0, + "step": 336 + }, + { + "clip_ratio": 0.0009491202072240412, + "completion_length": 218.2678680419922, + "epoch": 0.28559322033898304, + "grad_norm": 0.0, + "learning_rate": 3.809792843691149e-05, + "loss": 0.0, + "num_tokens": 1849113.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 337 + }, + { + "clip_ratio": 0.0021562932524830103, + "epoch": 0.2864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.807909604519775e-05, + "loss": 0.0, + "step": 338 + }, + { + "clip_ratio": 0.0030712890438735485, + "epoch": 0.28728813559322036, + "grad_norm": 0.0, + "learning_rate": 3.8060263653484e-05, + "loss": 0.0, + "step": 339 + }, + { + "clip_ratio": 0.0054122223518788815, + "epoch": 0.288135593220339, + "grad_norm": 0.0, + "learning_rate": 3.804143126177025e-05, + "loss": 0.0, + "step": 340 + }, + { + "clip_ratio": 0.00150212156586349, + "completion_length": 244.96429443359375, + "epoch": 0.2889830508474576, + "grad_norm": 0.0, + "learning_rate": 3.80225988700565e-05, + "loss": 0.0, + "num_tokens": 1869743.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 341 + }, + { + "clip_ratio": 0.0009405449964106083, + "epoch": 0.28983050847457625, + "grad_norm": 0.0, + "learning_rate": 3.800376647834275e-05, + "loss": 0.0, + "step": 342 + }, + { + "clip_ratio": 0.0021599442698061466, + "epoch": 0.29067796610169494, + "grad_norm": 0.0, + "learning_rate": 3.7984934086629e-05, + "loss": 0.0, + "step": 343 + }, + { + "clip_ratio": 0.002221164293587208, + "epoch": 0.29152542372881357, + "grad_norm": 0.0, + "learning_rate": 3.796610169491526e-05, + "loss": 0.0, + "step": 344 + }, + { + "clip_ratio": 0.0013717131223529577, + "completion_length": 223.3035888671875, + "epoch": 0.2923728813559322, + "grad_norm": 0.12633963981780688, + "learning_rate": 3.7947269303201505e-05, + "loss": 0.0043, + "num_tokens": 1889048.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 345 + }, + { + "clip_ratio": 0.004998536314815283, + "epoch": 0.29322033898305083, + "grad_norm": 0.09918328474458112, + "learning_rate": 3.792843691148776e-05, + "loss": 0.0014, + "step": 346 + }, + { + "clip_ratio": 0.013239889405667782, + "epoch": 0.2940677966101695, + "grad_norm": 0.07460521909248337, + "learning_rate": 3.7909604519774014e-05, + "loss": -0.0005, + "step": 347 + }, + { + "clip_ratio": 0.033433884382247925, + "epoch": 0.29491525423728815, + "grad_norm": 0.06058703312441606, + "learning_rate": 3.7890772128060265e-05, + "loss": -0.002, + "step": 348 + }, + { + "clip_ratio": 0.003388527315109968, + "completion_length": 246.87501525878906, + "epoch": 0.2957627118644068, + "grad_norm": 0.0, + "learning_rate": 3.787193973634652e-05, + "loss": 0.0, + "num_tokens": 1909737.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 349 + }, + { + "clip_ratio": 0.003924295771867037, + "epoch": 0.2966101694915254, + "grad_norm": 0.0, + "learning_rate": 3.7853107344632774e-05, + "loss": 0.0, + "step": 350 + }, + { + "clip_ratio": 0.004607200622558594, + "epoch": 0.29745762711864404, + "grad_norm": 0.0, + "learning_rate": 3.7834274952919025e-05, + "loss": 0.0, + "step": 351 + }, + { + "clip_ratio": 0.007875598035752773, + "epoch": 0.2983050847457627, + "grad_norm": 0.0, + "learning_rate": 3.7815442561205276e-05, + "loss": 0.0, + "step": 352 + }, + { + "clip_ratio": 0.0009100620518438518, + "completion_length": 221.5178680419922, + "epoch": 0.29915254237288136, + "grad_norm": 0.0, + "learning_rate": 3.779661016949153e-05, + "loss": 0.0, + "num_tokens": 1930142.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 353 + }, + { + "clip_ratio": 0.002130310283973813, + "epoch": 0.3, + "grad_norm": 0.0, + "learning_rate": 3.777777777777778e-05, + "loss": 0.0, + "step": 354 + }, + { + "clip_ratio": 0.005219562910497189, + "epoch": 0.3008474576271186, + "grad_norm": 0.0, + "learning_rate": 3.7758945386064036e-05, + "loss": 0.0, + "step": 355 + }, + { + "clip_ratio": 0.007768368814140558, + "epoch": 0.3016949152542373, + "grad_norm": 0.0, + "learning_rate": 3.774011299435029e-05, + "loss": 0.0, + "step": 356 + }, + { + "clip_ratio": 0.002148033818230033, + "completion_length": 170.1607208251953, + "epoch": 0.30254237288135594, + "grad_norm": 0.15185128372630483, + "learning_rate": 3.772128060263654e-05, + "loss": -0.0055, + "num_tokens": 1947871.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 357 + }, + { + "clip_ratio": 0.0094491271302104, + "epoch": 0.30338983050847457, + "grad_norm": 0.08593602543887907, + "learning_rate": 3.770244821092279e-05, + "loss": -0.0077, + "step": 358 + }, + { + "clip_ratio": 0.06222861260175705, + "epoch": 0.3042372881355932, + "grad_norm": 0.06218840086209775, + "learning_rate": 3.768361581920904e-05, + "loss": -0.0087, + "step": 359 + }, + { + "clip_ratio": 0.10356654226779938, + "epoch": 0.3050847457627119, + "grad_norm": 0.07005653665235588, + "learning_rate": 3.766478342749529e-05, + "loss": -0.0091, + "step": 360 + }, + { + "clip_ratio": 0.0011975433444604278, + "completion_length": 213.58929443359375, + "epoch": 0.3059322033898305, + "grad_norm": 0.0, + "learning_rate": 3.764595103578155e-05, + "loss": 0.0, + "num_tokens": 1967888.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 361 + }, + { + "clip_ratio": 0.0017720028990879655, + "epoch": 0.30677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.76271186440678e-05, + "loss": 0.0, + "step": 362 + }, + { + "clip_ratio": 0.002977039897814393, + "epoch": 0.3076271186440678, + "grad_norm": 0.0, + "learning_rate": 3.760828625235405e-05, + "loss": 0.0, + "step": 363 + }, + { + "clip_ratio": 0.0051023694686591625, + "epoch": 0.30847457627118646, + "grad_norm": 0.0, + "learning_rate": 3.758945386064031e-05, + "loss": 0.0, + "step": 364 + }, + { + "clip_ratio": 0.00277900043874979, + "completion_length": 175.7857208251953, + "epoch": 0.3093220338983051, + "grad_norm": 0.0, + "learning_rate": 3.7570621468926554e-05, + "loss": 0.0, + "num_tokens": 1985452.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 365 + }, + { + "clip_ratio": 0.0037012884858995676, + "epoch": 0.3101694915254237, + "grad_norm": 0.0, + "learning_rate": 3.755178907721281e-05, + "loss": 0.0, + "step": 366 + }, + { + "clip_ratio": 0.003547186963260174, + "epoch": 0.31101694915254235, + "grad_norm": 0.0, + "learning_rate": 3.753295668549906e-05, + "loss": 0.0, + "step": 367 + }, + { + "clip_ratio": 0.005322239827364683, + "epoch": 0.31186440677966104, + "grad_norm": 0.0, + "learning_rate": 3.7514124293785313e-05, + "loss": 0.0, + "step": 368 + }, + { + "clip_ratio": 0.004034657031297684, + "completion_length": 110.10714721679688, + "epoch": 0.31271186440677967, + "grad_norm": 0.12635496286827033, + "learning_rate": 3.7495291902071565e-05, + "loss": -0.0029, + "num_tokens": 1998890.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 369 + }, + { + "clip_ratio": 0.009292816743254662, + "epoch": 0.3135593220338983, + "grad_norm": 0.09312227257169194, + "learning_rate": 3.7476459510357816e-05, + "loss": -0.0051, + "step": 370 + }, + { + "clip_ratio": 0.03155684098601341, + "epoch": 0.31440677966101693, + "grad_norm": 0.05768111194452057, + "learning_rate": 3.745762711864407e-05, + "loss": -0.0068, + "step": 371 + }, + { + "clip_ratio": 0.0636262521147728, + "epoch": 0.3152542372881356, + "grad_norm": 0.047107030238279814, + "learning_rate": 3.7438794726930325e-05, + "loss": -0.0077, + "step": 372 + }, + { + "clip_ratio": 0.001232151291333139, + "completion_length": 234.87501525878906, + "epoch": 0.31610169491525425, + "grad_norm": 0.0, + "learning_rate": 3.7419962335216576e-05, + "loss": 0.0, + "num_tokens": 2018547.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 373 + }, + { + "clip_ratio": 0.000776052416767925, + "epoch": 0.3169491525423729, + "grad_norm": 0.0, + "learning_rate": 3.740112994350283e-05, + "loss": 0.0, + "step": 374 + }, + { + "clip_ratio": 0.0008471307810395956, + "epoch": 0.3177966101694915, + "grad_norm": 0.0, + "learning_rate": 3.7382297551789085e-05, + "loss": 0.0, + "step": 375 + }, + { + "clip_ratio": 0.0025251915212720633, + "epoch": 0.31864406779661014, + "grad_norm": 0.0, + "learning_rate": 3.736346516007533e-05, + "loss": 0.0, + "step": 376 + }, + { + "clip_ratio": 0.0007399375317618251, + "completion_length": 256.4821472167969, + "epoch": 0.31949152542372883, + "grad_norm": 0.0, + "learning_rate": 3.734463276836159e-05, + "loss": 0.0, + "num_tokens": 2041454.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 377 + }, + { + "clip_ratio": 0.0008243753691203892, + "epoch": 0.32033898305084746, + "grad_norm": 0.0, + "learning_rate": 3.732580037664784e-05, + "loss": 0.0, + "step": 378 + }, + { + "clip_ratio": 0.0007023674552328885, + "epoch": 0.3211864406779661, + "grad_norm": 0.0, + "learning_rate": 3.730696798493409e-05, + "loss": 0.0, + "step": 379 + }, + { + "clip_ratio": 0.0021256571635603905, + "epoch": 0.3220338983050847, + "grad_norm": 0.0, + "learning_rate": 3.728813559322034e-05, + "loss": 0.0, + "step": 380 + }, + { + "clip_ratio": 0.00031836971174925566, + "completion_length": 330.5357360839844, + "epoch": 0.3228813559322034, + "grad_norm": 0.0, + "learning_rate": 3.72693032015066e-05, + "loss": 0.0, + "num_tokens": 2067788.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 381 + }, + { + "clip_ratio": 0.0005261868936941028, + "epoch": 0.32372881355932204, + "grad_norm": 0.0, + "learning_rate": 3.725047080979284e-05, + "loss": 0.0, + "step": 382 + }, + { + "clip_ratio": 0.00022253258794080466, + "epoch": 0.32457627118644067, + "grad_norm": 0.0, + "learning_rate": 3.72316384180791e-05, + "loss": 0.0, + "step": 383 + }, + { + "clip_ratio": 0.0006931009120307863, + "epoch": 0.3254237288135593, + "grad_norm": 0.0, + "learning_rate": 3.721280602636535e-05, + "loss": 0.0, + "step": 384 + }, + { + "clip_ratio": 0.003750877920538187, + "completion_length": 131.32144165039062, + "epoch": 0.326271186440678, + "grad_norm": 0.13631999098251835, + "learning_rate": 3.71939736346516e-05, + "loss": -0.0398, + "num_tokens": 2082318.0, + "reward": 0.9285714626312256, + "reward_std": 0.2020305097103119, + "rewards/check_winston_local_func/mean": 0.9285714030265808, + "rewards/check_winston_local_func/std": 0.3745126724243164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 385 + }, + { + "clip_ratio": 0.008503442630171776, + "epoch": 0.3271186440677966, + "grad_norm": 0.10569445062232001, + "learning_rate": 3.717514124293786e-05, + "loss": -0.0422, + "step": 386 + }, + { + "clip_ratio": 0.023656172677874565, + "epoch": 0.32796610169491525, + "grad_norm": 0.08440756273471868, + "learning_rate": 3.715630885122411e-05, + "loss": -0.0449, + "step": 387 + }, + { + "clip_ratio": 0.05569107085466385, + "epoch": 0.3288135593220339, + "grad_norm": 0.0732865060873415, + "learning_rate": 3.713747645951036e-05, + "loss": -0.0475, + "step": 388 + }, + { + "clip_ratio": 0.0007906121318228543, + "completion_length": 286.9821472167969, + "epoch": 0.32966101694915256, + "grad_norm": 0.12082254351176634, + "learning_rate": 3.711864406779661e-05, + "loss": 0.0063, + "num_tokens": 2106125.0, + "reward": 0.8928571939468384, + "reward_std": 0.23327529430389404, + "rewards/check_winston_local_func/mean": 0.8928571343421936, + "rewards/check_winston_local_func/std": 0.45441555976867676, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 389 + }, + { + "clip_ratio": 0.0015471117803826928, + "epoch": 0.3305084745762712, + "grad_norm": 0.11784337456998906, + "learning_rate": 3.7099811676082864e-05, + "loss": 0.0037, + "step": 390 + }, + { + "clip_ratio": 0.012488815933465958, + "epoch": 0.3313559322033898, + "grad_norm": 0.0941445528587045, + "learning_rate": 3.7080979284369115e-05, + "loss": 0.0003, + "step": 391 + }, + { + "clip_ratio": 0.028695791959762573, + "epoch": 0.33220338983050846, + "grad_norm": 0.08292380918920757, + "learning_rate": 3.706214689265537e-05, + "loss": -0.0027, + "step": 392 + }, + { + "clip_ratio": 0.0003602007054723799, + "completion_length": 239.83929443359375, + "epoch": 0.33305084745762714, + "grad_norm": 0.03143069205221857, + "learning_rate": 3.704331450094162e-05, + "loss": -0.0148, + "num_tokens": 2126076.0, + "reward": 0.9642857313156128, + "reward_std": 0.10101525485515594, + "rewards/check_winston_local_func/mean": 0.9642857313156128, + "rewards/check_winston_local_func/std": 0.26726123690605164, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 393 + }, + { + "clip_ratio": 0.0013097112532705069, + "epoch": 0.3338983050847458, + "grad_norm": 0.03129928506196762, + "learning_rate": 3.7024482109227875e-05, + "loss": -0.0151, + "step": 394 + }, + { + "clip_ratio": 0.0041871643625199795, + "epoch": 0.3347457627118644, + "grad_norm": 0.030726291213562903, + "learning_rate": 3.7005649717514126e-05, + "loss": -0.0154, + "step": 395 + }, + { + "clip_ratio": 0.009951738640666008, + "epoch": 0.33559322033898303, + "grad_norm": 0.028938580269396656, + "learning_rate": 3.698681732580038e-05, + "loss": -0.0158, + "step": 396 + }, + { + "clip_ratio": 0.0010198758682236075, + "completion_length": 190.6607208251953, + "epoch": 0.3364406779661017, + "grad_norm": 0.0, + "learning_rate": 3.6967984934086635e-05, + "loss": 0.0, + "num_tokens": 2143449.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 397 + }, + { + "clip_ratio": 0.0006895315600559115, + "epoch": 0.33728813559322035, + "grad_norm": 0.0, + "learning_rate": 3.6949152542372886e-05, + "loss": 0.0, + "step": 398 + }, + { + "clip_ratio": 0.0038485280238091946, + "epoch": 0.338135593220339, + "grad_norm": 0.0, + "learning_rate": 3.693032015065914e-05, + "loss": 0.0, + "step": 399 + }, + { + "clip_ratio": 0.00733610987663269, + "epoch": 0.3389830508474576, + "grad_norm": 0.0, + "learning_rate": 3.691148775894539e-05, + "loss": 0.0, + "step": 400 + }, + { + "clip_ratio": 0.00022421723406296223, + "completion_length": 276.83929443359375, + "epoch": 0.3398305084745763, + "grad_norm": 0.0, + "learning_rate": 3.689265536723164e-05, + "loss": 0.0, + "num_tokens": 2166472.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 401 + }, + { + "clip_ratio": 0.00020110365585424006, + "epoch": 0.34067796610169493, + "grad_norm": 0.0, + "learning_rate": 3.687382297551789e-05, + "loss": 0.0, + "step": 402 + }, + { + "clip_ratio": 0.00024886298342607915, + "epoch": 0.34152542372881356, + "grad_norm": 0.0, + "learning_rate": 3.685499058380415e-05, + "loss": 0.0, + "step": 403 + }, + { + "clip_ratio": 0.0005991093348711729, + "epoch": 0.3423728813559322, + "grad_norm": 0.0, + "learning_rate": 3.68361581920904e-05, + "loss": 0.0, + "step": 404 + }, + { + "clip_ratio": 0.0006365530425682664, + "completion_length": 191.17857360839844, + "epoch": 0.3432203389830508, + "grad_norm": 0.0, + "learning_rate": 3.681732580037665e-05, + "loss": 0.0, + "num_tokens": 2184146.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 405 + }, + { + "clip_ratio": 0.00040138812619261444, + "epoch": 0.3440677966101695, + "grad_norm": 0.0, + "learning_rate": 3.679849340866291e-05, + "loss": 0.0, + "step": 406 + }, + { + "clip_ratio": 0.0013467035023495555, + "epoch": 0.34491525423728814, + "grad_norm": 0.0, + "learning_rate": 3.677966101694915e-05, + "loss": 0.0, + "step": 407 + }, + { + "clip_ratio": 0.0009324097190983593, + "epoch": 0.34576271186440677, + "grad_norm": 0.0, + "learning_rate": 3.676082862523541e-05, + "loss": 0.0, + "step": 408 + }, + { + "clip_ratio": 0.0026480748783797026, + "completion_length": 210.1428680419922, + "epoch": 0.3466101694915254, + "grad_norm": 0.0, + "learning_rate": 3.674199623352166e-05, + "loss": 0.0, + "num_tokens": 2203578.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 409 + }, + { + "clip_ratio": 0.001794831594452262, + "epoch": 0.3474576271186441, + "grad_norm": 0.0, + "learning_rate": 3.672316384180791e-05, + "loss": 0.0, + "step": 410 + }, + { + "clip_ratio": 0.0019218155648559332, + "epoch": 0.3483050847457627, + "grad_norm": 0.0, + "learning_rate": 3.6704331450094164e-05, + "loss": 0.0, + "step": 411 + }, + { + "clip_ratio": 0.002752742264419794, + "epoch": 0.34915254237288135, + "grad_norm": 0.0, + "learning_rate": 3.668549905838042e-05, + "loss": 0.0, + "step": 412 + }, + { + "clip_ratio": 0.001983263995498419, + "completion_length": 254.7857208251953, + "epoch": 0.35, + "grad_norm": 0.0, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.0, + "num_tokens": 2225574.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 413 + }, + { + "clip_ratio": 0.0027607560623437166, + "epoch": 0.35084745762711866, + "grad_norm": 0.0, + "learning_rate": 3.6647834274952924e-05, + "loss": 0.0, + "step": 414 + }, + { + "clip_ratio": 0.002160451840609312, + "epoch": 0.3516949152542373, + "grad_norm": 0.0, + "learning_rate": 3.6629001883239175e-05, + "loss": 0.0, + "step": 415 + }, + { + "clip_ratio": 0.002045322209596634, + "epoch": 0.3525423728813559, + "grad_norm": 0.0, + "learning_rate": 3.6610169491525426e-05, + "loss": 0.0, + "step": 416 + }, + { + "clip_ratio": 0.001061619957908988, + "completion_length": 241.58929443359375, + "epoch": 0.35338983050847456, + "grad_norm": 0.0, + "learning_rate": 3.6591337099811684e-05, + "loss": 0.0, + "num_tokens": 2245623.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 417 + }, + { + "clip_ratio": 0.0012349070748314261, + "epoch": 0.35423728813559324, + "grad_norm": 0.0, + "learning_rate": 3.657250470809793e-05, + "loss": 0.0, + "step": 418 + }, + { + "clip_ratio": 0.0013292384101077914, + "epoch": 0.3550847457627119, + "grad_norm": 0.0, + "learning_rate": 3.6553672316384186e-05, + "loss": 0.0, + "step": 419 + }, + { + "clip_ratio": 0.001080949092283845, + "epoch": 0.3559322033898305, + "grad_norm": 0.0, + "learning_rate": 3.653483992467044e-05, + "loss": 0.0, + "step": 420 + }, + { + "clip_ratio": 0.0021271593868732452, + "completion_length": 207.94644165039062, + "epoch": 0.35677966101694913, + "grad_norm": 0.0, + "learning_rate": 3.651600753295669e-05, + "loss": 0.0, + "num_tokens": 2264492.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 421 + }, + { + "clip_ratio": 0.0023707763757556677, + "epoch": 0.3576271186440678, + "grad_norm": 0.0, + "learning_rate": 3.649717514124294e-05, + "loss": 0.0, + "step": 422 + }, + { + "clip_ratio": 0.001882536569610238, + "epoch": 0.35847457627118645, + "grad_norm": 0.0, + "learning_rate": 3.64783427495292e-05, + "loss": 0.0, + "step": 423 + }, + { + "clip_ratio": 0.002321977633982897, + "epoch": 0.3593220338983051, + "grad_norm": 0.0, + "learning_rate": 3.645951035781544e-05, + "loss": 0.0, + "step": 424 + }, + { + "clip_ratio": 0.0014300509355962276, + "completion_length": 204.3928680419922, + "epoch": 0.3601694915254237, + "grad_norm": 0.0, + "learning_rate": 3.64406779661017e-05, + "loss": 0.0, + "num_tokens": 2283362.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 425 + }, + { + "clip_ratio": 0.001956741791218519, + "epoch": 0.3610169491525424, + "grad_norm": 0.0, + "learning_rate": 3.642184557438795e-05, + "loss": 0.0, + "step": 426 + }, + { + "clip_ratio": 0.0016578995855525136, + "epoch": 0.36186440677966103, + "grad_norm": 0.0, + "learning_rate": 3.64030131826742e-05, + "loss": 0.0, + "step": 427 + }, + { + "clip_ratio": 0.0019775168038904667, + "epoch": 0.36271186440677966, + "grad_norm": 0.0, + "learning_rate": 3.638418079096045e-05, + "loss": 0.0, + "step": 428 + }, + { + "clip_ratio": 0.001298850984312594, + "completion_length": 171.12501525878906, + "epoch": 0.3635593220338983, + "grad_norm": 0.0, + "learning_rate": 3.636534839924671e-05, + "loss": 0.0, + "num_tokens": 2300497.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 429 + }, + { + "clip_ratio": 0.001368111465126276, + "epoch": 0.3644067796610169, + "grad_norm": 0.0, + "learning_rate": 3.634651600753296e-05, + "loss": 0.0, + "step": 430 + }, + { + "clip_ratio": 0.0009136229637078941, + "epoch": 0.3652542372881356, + "grad_norm": 0.0, + "learning_rate": 3.632768361581921e-05, + "loss": 0.0, + "step": 431 + }, + { + "clip_ratio": 0.0010722121223807335, + "epoch": 0.36610169491525424, + "grad_norm": 0.0, + "learning_rate": 3.630885122410546e-05, + "loss": 0.0, + "step": 432 + }, + { + "clip_ratio": 0.0001718213752610609, + "completion_length": 236.62501525878906, + "epoch": 0.36694915254237287, + "grad_norm": 0.0, + "learning_rate": 3.6290018832391714e-05, + "loss": 0.0, + "num_tokens": 2322044.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 433 + }, + { + "clip_ratio": 0.0003989361284766346, + "epoch": 0.3677966101694915, + "grad_norm": 0.0, + "learning_rate": 3.627118644067797e-05, + "loss": 0.0, + "step": 434 + }, + { + "clip_ratio": 0.00034700697869993746, + "epoch": 0.3686440677966102, + "grad_norm": 0.0, + "learning_rate": 3.625235404896422e-05, + "loss": 0.0, + "step": 435 + }, + { + "clip_ratio": 0.0001743702741805464, + "epoch": 0.3694915254237288, + "grad_norm": 0.0, + "learning_rate": 3.6233521657250474e-05, + "loss": 0.0, + "step": 436 + }, + { + "clip_ratio": 0.002670450834557414, + "completion_length": 238.0357208251953, + "epoch": 0.37033898305084745, + "grad_norm": 0.0, + "learning_rate": 3.6214689265536725e-05, + "loss": 0.0, + "num_tokens": 2342662.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 437 + }, + { + "clip_ratio": 0.002840070752426982, + "epoch": 0.3711864406779661, + "grad_norm": 0.0, + "learning_rate": 3.6195856873822976e-05, + "loss": 0.0, + "step": 438 + }, + { + "clip_ratio": 0.0031569055281579494, + "epoch": 0.37203389830508476, + "grad_norm": 0.0, + "learning_rate": 3.617702448210923e-05, + "loss": 0.0, + "step": 439 + }, + { + "clip_ratio": 0.002347626956179738, + "epoch": 0.3728813559322034, + "grad_norm": 0.0, + "learning_rate": 3.6158192090395485e-05, + "loss": 0.0, + "step": 440 + }, + { + "clip_ratio": 0.001670468831434846, + "completion_length": 260.2857360839844, + "epoch": 0.373728813559322, + "grad_norm": 0.0, + "learning_rate": 3.6139359698681736e-05, + "loss": 0.0, + "num_tokens": 2364478.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 441 + }, + { + "clip_ratio": 0.0009430252248421311, + "epoch": 0.37457627118644066, + "grad_norm": 0.0, + "learning_rate": 3.612052730696799e-05, + "loss": 0.0, + "step": 442 + }, + { + "clip_ratio": 0.001254075556062162, + "epoch": 0.37542372881355934, + "grad_norm": 0.0, + "learning_rate": 3.610169491525424e-05, + "loss": 0.0, + "step": 443 + }, + { + "clip_ratio": 0.0011122890282422304, + "epoch": 0.376271186440678, + "grad_norm": 0.0, + "learning_rate": 3.608286252354049e-05, + "loss": 0.0, + "step": 444 + }, + { + "clip_ratio": 0.0003623559314291924, + "completion_length": 211.82144165039062, + "epoch": 0.3771186440677966, + "grad_norm": 0.0, + "learning_rate": 3.606403013182675e-05, + "loss": 0.0, + "num_tokens": 2383820.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 445 + }, + { + "clip_ratio": 0.00031094998121261597, + "epoch": 0.37796610169491524, + "grad_norm": 0.0, + "learning_rate": 3.6045197740113e-05, + "loss": 0.0, + "step": 446 + }, + { + "clip_ratio": 0.0007522654486820102, + "epoch": 0.3788135593220339, + "grad_norm": 0.0, + "learning_rate": 3.602636534839925e-05, + "loss": 0.0, + "step": 447 + }, + { + "clip_ratio": 0.00031094998121261597, + "epoch": 0.37966101694915255, + "grad_norm": 0.0, + "learning_rate": 3.60075329566855e-05, + "loss": 0.0, + "step": 448 + }, + { + "clip_ratio": 0.00025147091946564615, + "completion_length": 198.21429443359375, + "epoch": 0.3805084745762712, + "grad_norm": 0.0, + "learning_rate": 3.598870056497175e-05, + "loss": 0.0, + "num_tokens": 2402592.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 449 + }, + { + "clip_ratio": 0.0006872185622341931, + "epoch": 0.3813559322033898, + "grad_norm": 0.0, + "learning_rate": 3.5969868173258e-05, + "loss": 0.0, + "step": 450 + }, + { + "clip_ratio": 0.0006297206855379045, + "epoch": 0.3822033898305085, + "grad_norm": 0.0, + "learning_rate": 3.595103578154426e-05, + "loss": 0.0, + "step": 451 + }, + { + "clip_ratio": 0.00026596483075991273, + "epoch": 0.38305084745762713, + "grad_norm": 0.0, + "learning_rate": 3.593220338983051e-05, + "loss": 0.0, + "step": 452 + }, + { + "clip_ratio": 0.001058344729244709, + "completion_length": 238.69644165039062, + "epoch": 0.38389830508474576, + "grad_norm": 0.0, + "learning_rate": 3.591337099811676e-05, + "loss": 0.0, + "num_tokens": 2423007.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 453 + }, + { + "clip_ratio": 0.0014913092600181699, + "epoch": 0.3847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.589453860640302e-05, + "loss": 0.0, + "step": 454 + }, + { + "clip_ratio": 0.001653712592087686, + "epoch": 0.3855932203389831, + "grad_norm": 0.0, + "learning_rate": 3.5875706214689265e-05, + "loss": 0.0, + "step": 455 + }, + { + "clip_ratio": 0.0012562735937535763, + "epoch": 0.3864406779661017, + "grad_norm": 0.0, + "learning_rate": 3.585687382297552e-05, + "loss": 0.0, + "step": 456 + }, + { + "clip_ratio": 0.0019226728472858667, + "completion_length": 215.57144165039062, + "epoch": 0.38728813559322034, + "grad_norm": 0.0, + "learning_rate": 3.5838041431261774e-05, + "loss": 0.0, + "num_tokens": 2442303.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 457 + }, + { + "clip_ratio": 0.0007327733910642564, + "epoch": 0.38813559322033897, + "grad_norm": 0.0, + "learning_rate": 3.5819209039548025e-05, + "loss": 0.0, + "step": 458 + }, + { + "clip_ratio": 0.0009872624650597572, + "epoch": 0.3889830508474576, + "grad_norm": 0.0, + "learning_rate": 3.5800376647834276e-05, + "loss": 0.0, + "step": 459 + }, + { + "clip_ratio": 0.0015375473303720355, + "epoch": 0.3898305084745763, + "grad_norm": 0.0, + "learning_rate": 3.5781544256120534e-05, + "loss": 0.0, + "step": 460 + }, + { + "clip_ratio": 0.0025008555967360735, + "completion_length": 174.6607208251953, + "epoch": 0.3906779661016949, + "grad_norm": 0.0, + "learning_rate": 3.576271186440678e-05, + "loss": 0.0, + "num_tokens": 2458980.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 461 + }, + { + "clip_ratio": 0.002855929546058178, + "epoch": 0.39152542372881355, + "grad_norm": 0.0, + "learning_rate": 3.5743879472693036e-05, + "loss": 0.0, + "step": 462 + }, + { + "clip_ratio": 0.0031251059845089912, + "epoch": 0.3923728813559322, + "grad_norm": 0.0, + "learning_rate": 3.572504708097929e-05, + "loss": 0.0, + "step": 463 + }, + { + "clip_ratio": 0.002123030601069331, + "epoch": 0.39322033898305087, + "grad_norm": 0.0, + "learning_rate": 3.570621468926554e-05, + "loss": 0.0, + "step": 464 + }, + { + "clip_ratio": 0.00038411590503528714, + "completion_length": 251.46429443359375, + "epoch": 0.3940677966101695, + "grad_norm": 0.0, + "learning_rate": 3.5687382297551796e-05, + "loss": 0.0, + "num_tokens": 2480318.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 465 + }, + { + "clip_ratio": 0.0006886526243761182, + "epoch": 0.3949152542372881, + "grad_norm": 0.0, + "learning_rate": 3.566854990583805e-05, + "loss": 0.0, + "step": 466 + }, + { + "clip_ratio": 0.0007815518183633685, + "epoch": 0.39576271186440676, + "grad_norm": 0.0, + "learning_rate": 3.56497175141243e-05, + "loss": 0.0, + "step": 467 + }, + { + "clip_ratio": 0.0003181493084412068, + "epoch": 0.39661016949152544, + "grad_norm": 0.0, + "learning_rate": 3.563088512241055e-05, + "loss": 0.0, + "step": 468 + }, + { + "clip_ratio": 0.0013011619448661804, + "completion_length": 229.0178680419922, + "epoch": 0.3974576271186441, + "grad_norm": 0.0, + "learning_rate": 3.56120527306968e-05, + "loss": 0.0, + "num_tokens": 2500487.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 469 + }, + { + "clip_ratio": 0.0009047752828337252, + "epoch": 0.3983050847457627, + "grad_norm": 0.0, + "learning_rate": 3.559322033898305e-05, + "loss": 0.0, + "step": 470 + }, + { + "clip_ratio": 0.0014872046886011958, + "epoch": 0.39915254237288134, + "grad_norm": 0.0, + "learning_rate": 3.557438794726931e-05, + "loss": 0.0, + "step": 471 + }, + { + "clip_ratio": 0.0017439923249185085, + "epoch": 0.4, + "grad_norm": 0.0, + "learning_rate": 3.555555555555555e-05, + "loss": 0.0, + "step": 472 + }, + { + "clip_ratio": 0.0008359851781278849, + "completion_length": 224.00001525878906, + "epoch": 0.40084745762711865, + "grad_norm": 0.0, + "learning_rate": 3.553672316384181e-05, + "loss": 0.0, + "num_tokens": 2520007.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 473 + }, + { + "clip_ratio": 0.0003327638260088861, + "epoch": 0.4016949152542373, + "grad_norm": 0.0, + "learning_rate": 3.551789077212806e-05, + "loss": 0.0, + "step": 474 + }, + { + "clip_ratio": 0.0008026210125535727, + "epoch": 0.4025423728813559, + "grad_norm": 0.0, + "learning_rate": 3.549905838041431e-05, + "loss": 0.0, + "step": 475 + }, + { + "clip_ratio": 0.000616455334238708, + "epoch": 0.4033898305084746, + "grad_norm": 0.0, + "learning_rate": 3.548022598870057e-05, + "loss": 0.0, + "step": 476 + }, + { + "clip_ratio": 0.0010224612196907401, + "completion_length": 212.6607208251953, + "epoch": 0.40423728813559323, + "grad_norm": 0.0, + "learning_rate": 3.546139359698682e-05, + "loss": 0.0, + "num_tokens": 2539756.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 477 + }, + { + "clip_ratio": 0.001049195067025721, + "epoch": 0.40508474576271186, + "grad_norm": 0.0, + "learning_rate": 3.544256120527307e-05, + "loss": 0.0, + "step": 478 + }, + { + "clip_ratio": 0.0010224612196907401, + "epoch": 0.4059322033898305, + "grad_norm": 0.0, + "learning_rate": 3.5423728813559324e-05, + "loss": 0.0, + "step": 479 + }, + { + "clip_ratio": 0.0011091463966295123, + "epoch": 0.4067796610169492, + "grad_norm": 0.0, + "learning_rate": 3.5404896421845575e-05, + "loss": 0.0, + "step": 480 + }, + { + "clip_ratio": 0.0008076262311078608, + "completion_length": 188.33929443359375, + "epoch": 0.4076271186440678, + "grad_norm": 0.0, + "learning_rate": 3.5386064030131826e-05, + "loss": 0.0, + "num_tokens": 2557479.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 481 + }, + { + "clip_ratio": 0.0010148414876312017, + "epoch": 0.40847457627118644, + "grad_norm": 0.0, + "learning_rate": 3.5367231638418084e-05, + "loss": 0.0, + "step": 482 + }, + { + "clip_ratio": 0.00107091898098588, + "epoch": 0.40932203389830507, + "grad_norm": 0.0, + "learning_rate": 3.5348399246704335e-05, + "loss": 0.0, + "step": 483 + }, + { + "clip_ratio": 0.001206585788168013, + "epoch": 0.4101694915254237, + "grad_norm": 0.0, + "learning_rate": 3.5329566854990586e-05, + "loss": 0.0, + "step": 484 + }, + { + "clip_ratio": 0.0006591131095774472, + "completion_length": 202.25001525878906, + "epoch": 0.4110169491525424, + "grad_norm": 0.0, + "learning_rate": 3.5310734463276844e-05, + "loss": 0.0, + "num_tokens": 2575709.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 485 + }, + { + "clip_ratio": 0.0011115194065496325, + "epoch": 0.411864406779661, + "grad_norm": 0.0, + "learning_rate": 3.529190207156309e-05, + "loss": 0.0, + "step": 486 + }, + { + "clip_ratio": 0.0010618160013109446, + "epoch": 0.41271186440677965, + "grad_norm": 0.0, + "learning_rate": 3.5273069679849346e-05, + "loss": 0.0, + "step": 487 + }, + { + "clip_ratio": 0.001367724034935236, + "epoch": 0.4135593220338983, + "grad_norm": 0.0, + "learning_rate": 3.52542372881356e-05, + "loss": 0.0, + "step": 488 + }, + { + "clip_ratio": 0.00175467727240175, + "completion_length": 132.1428680419922, + "epoch": 0.41440677966101697, + "grad_norm": 0.0, + "learning_rate": 3.523540489642185e-05, + "loss": 0.0, + "num_tokens": 2590205.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 489 + }, + { + "clip_ratio": 0.0024672539439052343, + "epoch": 0.4152542372881356, + "grad_norm": 0.0, + "learning_rate": 3.52165725047081e-05, + "loss": 0.0, + "step": 490 + }, + { + "clip_ratio": 0.001686076750047505, + "epoch": 0.4161016949152542, + "grad_norm": 0.0, + "learning_rate": 3.519774011299436e-05, + "loss": 0.0, + "step": 491 + }, + { + "clip_ratio": 0.001242591068148613, + "epoch": 0.41694915254237286, + "grad_norm": 0.0, + "learning_rate": 3.51789077212806e-05, + "loss": 0.0, + "step": 492 + }, + { + "clip_ratio": 0.00047669216291978955, + "completion_length": 206.7857208251953, + "epoch": 0.41779661016949154, + "grad_norm": 0.0, + "learning_rate": 3.516007532956686e-05, + "loss": 0.0, + "num_tokens": 2608545.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 493 + }, + { + "clip_ratio": 0.0006388615584000945, + "epoch": 0.4186440677966102, + "grad_norm": 0.0, + "learning_rate": 3.514124293785311e-05, + "loss": 0.0, + "step": 494 + }, + { + "clip_ratio": 0.0007421272457577288, + "epoch": 0.4194915254237288, + "grad_norm": 0.0, + "learning_rate": 3.512241054613936e-05, + "loss": 0.0, + "step": 495 + }, + { + "clip_ratio": 0.0011416444322094321, + "epoch": 0.42033898305084744, + "grad_norm": 0.0, + "learning_rate": 3.510357815442562e-05, + "loss": 0.0, + "step": 496 + }, + { + "clip_ratio": 0.0008945852750912309, + "completion_length": 218.6428680419922, + "epoch": 0.4211864406779661, + "grad_norm": 0.0, + "learning_rate": 3.5084745762711864e-05, + "loss": 0.0, + "num_tokens": 2628469.0, + "reward": 1.0, + "reward_std": 0.0, + "rewards/check_winston_local_func/mean": 1.0, + "rewards/check_winston_local_func/std": 0.0, + "rewards/soft_format_reward_func/mean": 0.0, + "rewards/soft_format_reward_func/std": 0.0, + "rewards/strict_format_reward_func/mean": 0.0, + "rewards/strict_format_reward_func/std": 0.0, + "step": 497 + }, + { + "clip_ratio": 0.0007813043775968254, + "epoch": 0.42203389830508475, + "grad_norm": 0.0, + "learning_rate": 3.506591337099812e-05, + "loss": 0.0, + "step": 498 + }, + { + "clip_ratio": 0.0005737429019063711, + "epoch": 0.4228813559322034, + "grad_norm": 0.0, + "learning_rate": 3.504708097928437e-05, + "loss": 0.0, + "step": 499 + }, + { + "clip_ratio": 0.0009029792272485793, + "epoch": 0.423728813559322, + "grad_norm": 0.0, + "learning_rate": 3.5028248587570624e-05, + "loss": 0.0, + "step": 500 + } + ], + "logging_steps": 1, + "max_steps": 2360, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1261db78e0a310bb2e0bd6333e2741bd2c4391ea --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3bd8ba987ac3c91f8253f49fc4f0e162f8c1db67922f9a6a6a7ad4757383ff +size 7544 diff --git a/checkpoint-500/zero_to_fp32.py b/checkpoint-500/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/checkpoint-500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters)