diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..74b51875dbebcc908a68346cc6ef6cab8075d86e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-25/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-4/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-75/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +training_curves.png filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fd22dc59267fb8ead4093f0e075f0fa1d6604eb0 --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +model_name: defender-grpo +tags: +- base_model:adapter:unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +- grpo +- lora +- transformers +- trl +- unsloth +licence: license +pipeline_tag: text-generation +--- + +# Model Card for defender-grpo + +This model is a fine-tuned version of [unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/n0s0ktesting-testing-labs/memex-grpo/runs/m7t3s1qv) + + +This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300). + +### Framework versions + +- PEFT 0.18.1 +- TRL: 0.24.0 +- Transformers: 5.5.0 +- Pytorch: 2.10.0+cu128 +- Datasets: 4.3.0 +- Tokenizers: 0.22.2 + +## Citations + +Cite GRPO as: + +```bibtex +@article{shao2024deepseekmath, + title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}}, + author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo}, + year = 2024, + eprint = {arXiv:2402.03300}, +} + +``` + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0578d0149020358a61e1ca953a43cbd73121376f --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,50 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "LlamaForCausalLM", + "parent_library": "transformers.models.llama.modeling_llama", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "v_proj", + "up_proj", + "k_proj", + "down_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2effa52b11f66a027719878917fb719738f96f1b --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8490b3cc93d1fdc69a2391c241a0e318ab796aade2dd8426fa744d48388f6c3d +size 167832240 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c1faba2efaacd24a9c0a637ded9575527657860d --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +- grpo +- lora +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0578d0149020358a61e1ca953a43cbd73121376f --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,50 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "LlamaForCausalLM", + "parent_library": "transformers.models.llama.modeling_llama", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "v_proj", + "up_proj", + "k_proj", + "down_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2effa52b11f66a027719878917fb719738f96f1b --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8490b3cc93d1fdc69a2391c241a0e318ab796aade2dd8426fa744d48388f6c3d +size 167832240 diff --git a/checkpoint-100/chat_template.jinja b/checkpoint-100/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-100/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac0df1015d94d7b815fc49b7518f66feffce9e56 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1748401cbf323eca17f738d82e7e55a02b502f79f2e04cae154f833784cf04b6 +size 85728229 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad6c55c6daa142ece86150c56dad42cb5f1711ac --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208a400a8cfa1d7765e89820ba09555d66dc4e92af3c8df24d0a48bf843a020e +size 14581 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..215c5d2069bd81cb35727ebca07a510ac59c9d94 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4393a84a3109995aa1202073b039b12062e3189ed89aa0b94ef0510ba843009 +size 1383 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4a578581bb86b813d572f2d39ee6b95eb81bbcf --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4412fe7416c13987e8bfa6d9728efd24101a8414c3408f7c05f019d719b0da8c +size 1465 diff --git a/checkpoint-100/tokenizer.json b/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5b9e375b2b4e8c4210d9d8a8a8d0642d1f715076 --- /dev/null +++ b/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05 +size 17210099 diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a55a3083a0efb3c15b58aa6c3517ac5dfc6d1ca --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1,2068 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "from_slow": true, + "is_local": false, + "legacy": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "TokenizersBackend", + "unk_token": null, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + } +} diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ceaad2f601525c11b4653dc28d46a1fb7dc60b1d --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,3334 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 659.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 821.0, + "completions/max_terminated_length": 821.0, + "completions/mean_length": 659.0, + "completions/mean_terminated_length": 659.0, + "completions/min_length": 516.0, + "completions/min_terminated_length": 516.0, + "epoch": 0.01, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13492469489574432, + "kl": 6.197717993927654e-06, + "learning_rate": 0.0, + "loss": 2.8312206268310547e-07, + "num_tokens": 5736.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 1 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 543.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 693.0, + "completions/max_terminated_length": 693.0, + "completions/mean_length": 543.75, + "completions/mean_terminated_length": 543.75, + "completions/min_length": 426.0, + "completions/min_terminated_length": 426.0, + "epoch": 0.02, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1295754313468933, + "kl": 7.463787596861948e-06, + "learning_rate": 5.000000000000001e-07, + "loss": 3.725290298461914e-07, + "num_tokens": 11019.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09749999642372131, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 2 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 572.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 677.0, + "completions/max_terminated_length": 677.0, + "completions/mean_length": 572.75, + "completions/mean_terminated_length": 572.75, + "completions/min_length": 490.0, + "completions/min_terminated_length": 490.0, + "epoch": 0.03, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1402619630098343, + "kl": 5.991519174131099e-06, + "learning_rate": 1.0000000000000002e-06, + "loss": 2.2351741790771484e-07, + "num_tokens": 16418.0, + "reward": 0.5375000238418579, + "reward_std": 0.004999995231628418, + "rewards/reward_environment_execution/mean": 0.03750000149011612, + "rewards/reward_environment_execution/std": 0.004999999888241291, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 3 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 705.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 783.0, + "completions/max_terminated_length": 783.0, + "completions/mean_length": 705.0, + "completions/mean_terminated_length": 705.0, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.04, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13879841566085815, + "kl": 6.360480483635911e-06, + "learning_rate": 1.5e-06, + "loss": 2.980232238769531e-07, + "num_tokens": 22514.0, + "reward": 0.24392502009868622, + "reward_std": 0.8475051522254944, + "rewards/reward_environment_execution/mean": 0.04392500966787338, + "rewards/reward_environment_execution/std": 0.4515362083911896, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.17500001192092896, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 4 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 677.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 785.0, + "completions/max_terminated_length": 785.0, + "completions/mean_length": 677.0, + "completions/mean_terminated_length": 677.0, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.05, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.10656019300222397, + "kl": 4.833805178350303e-06, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.2351741790771484e-07, + "num_tokens": 28326.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09750000387430191, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 5 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 701.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/max_terminated_length": 913.0, + "completions/mean_length": 701.0, + "completions/mean_terminated_length": 701.0, + "completions/min_length": 553.0, + "completions/min_terminated_length": 553.0, + "epoch": 0.06, + "frac_reward_zero_std": 1.0, + "grad_norm": 6.133589340606704e-05, + "kl": 6.346002919599414e-06, + "learning_rate": 2.5e-06, + "loss": 2.5384014179508085e-07, + "num_tokens": 34230.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": -0.5, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": -0.5, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.0, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 6 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 524.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 640.0, + "completions/max_terminated_length": 640.0, + "completions/mean_length": 524.0, + "completions/mean_terminated_length": 524.0, + "completions/min_length": 450.0, + "completions/min_terminated_length": 450.0, + "epoch": 0.07, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13970911502838135, + "kl": 6.830848860772676e-06, + "learning_rate": 3e-06, + "loss": 2.8312206268310547e-07, + "num_tokens": 39426.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 7 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 626.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 725.0, + "completions/max_terminated_length": 725.0, + "completions/mean_length": 626.0, + "completions/mean_terminated_length": 626.0, + "completions/min_length": 537.0, + "completions/min_terminated_length": 537.0, + "epoch": 0.08, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12764990329742432, + "kl": 5.7269965054729255e-06, + "learning_rate": 3.5e-06, + "loss": 2.1606683731079102e-07, + "num_tokens": 45026.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 8 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 669.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 752.0, + "completions/max_terminated_length": 752.0, + "completions/mean_length": 669.5, + "completions/mean_terminated_length": 669.5, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.09, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1321861445903778, + "kl": 5.624260097647493e-06, + "learning_rate": 4.000000000000001e-06, + "loss": 2.086162567138672e-07, + "num_tokens": 50812.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 9 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 640.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 790.0, + "completions/max_terminated_length": 790.0, + "completions/mean_length": 640.25, + "completions/mean_terminated_length": 640.25, + "completions/min_length": 471.0, + "completions/min_terminated_length": 471.0, + "epoch": 0.1, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12317467480897903, + "kl": 6.860862526991696e-06, + "learning_rate": 4.5e-06, + "loss": 3.0547380447387695e-07, + "num_tokens": 56485.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 664.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 855.0, + "completions/max_terminated_length": 855.0, + "completions/mean_length": 664.75, + "completions/mean_terminated_length": 664.75, + "completions/min_length": 559.0, + "completions/min_terminated_length": 559.0, + "epoch": 0.11, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1155354306101799, + "kl": 6.481316631834488e-06, + "learning_rate": 5e-06, + "loss": 2.682209014892578e-07, + "num_tokens": 62252.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 11 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 699.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 880.0, + "completions/max_terminated_length": 880.0, + "completions/mean_length": 699.5, + "completions/mean_terminated_length": 699.5, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.12, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11765824258327484, + "kl": 5.136041295372706e-06, + "learning_rate": 4.99847706754774e-06, + "loss": 1.862645149230957e-07, + "num_tokens": 68150.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 12 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 753.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/max_terminated_length": 913.0, + "completions/mean_length": 753.5, + "completions/mean_terminated_length": 753.5, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.13, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1163109764456749, + "kl": 6.4769044456625124e-06, + "learning_rate": 4.993910125649561e-06, + "loss": 2.682209014892578e-07, + "num_tokens": 74436.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 13 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 691.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 870.0, + "completions/max_terminated_length": 870.0, + "completions/mean_length": 691.25, + "completions/mean_terminated_length": 691.25, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.14, + "frac_reward_zero_std": 1.0, + "grad_norm": 5.699428584193811e-05, + "kl": 4.6748977524657676e-06, + "learning_rate": 4.986304738420684e-06, + "loss": 1.869958907718683e-07, + "num_tokens": 80465.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 14 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 588.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 832.0, + "completions/max_terminated_length": 832.0, + "completions/mean_length": 588.0, + "completions/mean_terminated_length": 588.0, + "completions/min_length": 432.0, + "completions/min_terminated_length": 432.0, + "epoch": 0.15, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00010395531717222184, + "kl": 9.099214707930514e-06, + "learning_rate": 4.975670171853926e-06, + "loss": 3.639685814960103e-07, + "num_tokens": 86089.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": -0.5, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": -0.5, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.0, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 15 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 607.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 673.0, + "completions/max_terminated_length": 673.0, + "completions/mean_length": 607.75, + "completions/mean_terminated_length": 607.75, + "completions/min_length": 517.0, + "completions/min_terminated_length": 517.0, + "epoch": 0.16, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1489061564207077, + "kl": 9.158140301224194e-06, + "learning_rate": 4.962019382530521e-06, + "loss": 3.2782554626464844e-07, + "num_tokens": 91620.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 16 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 657.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 737.0, + "completions/max_terminated_length": 737.0, + "completions/mean_length": 657.25, + "completions/mean_terminated_length": 657.25, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.17, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14814995229244232, + "kl": 1.14353706521797e-05, + "learning_rate": 4.9453690018345144e-06, + "loss": 4.470348358154297e-07, + "num_tokens": 97513.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 17 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 549.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 621.0, + "completions/max_terminated_length": 621.0, + "completions/mean_length": 549.25, + "completions/mean_terminated_length": 549.25, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.18, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13028942048549652, + "kl": 1.1230136237827537e-05, + "learning_rate": 4.925739315689991e-06, + "loss": 4.6193599700927734e-07, + "num_tokens": 102810.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 18 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 565.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 686.0, + "completions/max_terminated_length": 686.0, + "completions/mean_length": 565.5, + "completions/mean_terminated_length": 565.5, + "completions/min_length": 288.0, + "completions/min_terminated_length": 288.0, + "epoch": 0.19, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.2010871171951294, + "kl": 1.3259304523671744e-05, + "learning_rate": 4.903154239845798e-06, + "loss": 5.438923835754395e-07, + "num_tokens": 108344.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 19 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 692.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 777.0, + "completions/max_terminated_length": 777.0, + "completions/mean_length": 692.25, + "completions/mean_terminated_length": 692.25, + "completions/min_length": 633.0, + "completions/min_terminated_length": 633.0, + "epoch": 0.2, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12407222390174866, + "kl": 1.1413530501158675e-05, + "learning_rate": 4.8776412907378845e-06, + "loss": 4.6938657760620117e-07, + "num_tokens": 114393.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 633.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 702.0, + "completions/max_terminated_length": 702.0, + "completions/mean_length": 633.0, + "completions/mean_terminated_length": 633.0, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.21, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14354389905929565, + "kl": 1.5171975746852695e-05, + "learning_rate": 4.849231551964771e-06, + "loss": 6.407499313354492e-07, + "num_tokens": 120177.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 21 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 669.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 978.0, + "completions/max_terminated_length": 978.0, + "completions/mean_length": 669.0, + "completions/mean_terminated_length": 669.0, + "completions/min_length": 444.0, + "completions/min_terminated_length": 444.0, + "epoch": 0.22, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12197419255971909, + "kl": 1.5716058214820805e-05, + "learning_rate": 4.817959636416969e-06, + "loss": 6.183981895446777e-07, + "num_tokens": 126125.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 22 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 774.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1024.0, + "completions/max_terminated_length": 1024.0, + "completions/mean_length": 774.25, + "completions/mean_terminated_length": 774.25, + "completions/min_length": 484.0, + "completions/min_terminated_length": 484.0, + "epoch": 0.23, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1378677934408188, + "kl": 2.0351369357740623e-05, + "learning_rate": 4.783863644106502e-06, + "loss": 8.493661880493164e-07, + "num_tokens": 132326.0, + "reward": -0.21249999105930328, + "reward_std": 0.9097756743431091, + "rewards/reward_environment_execution/mean": -0.23750001192092896, + "rewards/reward_environment_execution/std": 0.30335623025894165, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.125, + "rewards/reward_investigation_quality/std": 0.15000000596046448, + "rewards/reward_os_mechanics/mean": 0.05000000074505806, + "rewards/reward_os_mechanics/std": 0.10000000149011612, + "step": 23 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 565.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 732.0, + "completions/max_terminated_length": 732.0, + "completions/mean_length": 565.75, + "completions/mean_terminated_length": 565.75, + "completions/min_length": 314.0, + "completions/min_terminated_length": 314.0, + "epoch": 0.24, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1561768352985382, + "kl": 2.7805408535641618e-05, + "learning_rate": 4.746985115747918e-06, + "loss": 1.1026859283447266e-06, + "num_tokens": 137685.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 24 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 668.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 909.0, + "completions/max_terminated_length": 909.0, + "completions/mean_length": 668.75, + "completions/mean_terminated_length": 668.75, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.25, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13285094499588013, + "kl": 2.5312373509223107e-05, + "learning_rate": 4.707368982147318e-06, + "loss": 1.0132789611816406e-06, + "num_tokens": 143456.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 25 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 487.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 559.0, + "completions/max_terminated_length": 559.0, + "completions/mean_length": 487.25, + "completions/mean_terminated_length": 487.25, + "completions/min_length": 420.0, + "completions/min_terminated_length": 420.0, + "epoch": 0.26, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.22987650334835052, + "kl": 2.307988370375824e-05, + "learning_rate": 4.665063509461098e-06, + "loss": 9.5367431640625e-07, + "num_tokens": 148505.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 26 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 667.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 782.0, + "completions/max_terminated_length": 782.0, + "completions/mean_length": 667.25, + "completions/mean_terminated_length": 667.25, + "completions/min_length": 533.0, + "completions/min_terminated_length": 533.0, + "epoch": 0.27, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12192773073911667, + "kl": 2.0764900455105817e-05, + "learning_rate": 4.620120240391065e-06, + "loss": 8.642673492431641e-07, + "num_tokens": 154454.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 27 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 722.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 896.0, + "completions/max_terminated_length": 896.0, + "completions/mean_length": 722.0, + "completions/mean_terminated_length": 722.0, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.28, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11616191267967224, + "kl": 2.81208617707307e-05, + "learning_rate": 4.572593931387604e-06, + "loss": 1.1026859283447266e-06, + "num_tokens": 160446.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 28 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 686.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 743.0, + "completions/max_terminated_length": 743.0, + "completions/mean_length": 686.0, + "completions/mean_terminated_length": 686.0, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.29, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13982541859149933, + "kl": 2.6462233336133067e-05, + "learning_rate": 4.522542485937369e-06, + "loss": 1.043081283569336e-06, + "num_tokens": 166286.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 29 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 673.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 847.0, + "completions/max_terminated_length": 847.0, + "completions/mean_length": 673.75, + "completions/mean_terminated_length": 673.75, + "completions/min_length": 536.0, + "completions/min_terminated_length": 536.0, + "epoch": 0.3, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17358987033367157, + "kl": 4.3880153043573955e-05, + "learning_rate": 4.470026884016805e-06, + "loss": 1.9818544387817383e-06, + "num_tokens": 172089.0, + "reward": 0.6325000524520874, + "reward_std": 0.14221462607383728, + "rewards/reward_environment_execution/mean": 0.057500001043081284, + "rewards/reward_environment_execution/std": 0.056789085268974304, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.25, + "rewards/reward_investigation_quality/std": 0.057735033333301544, + "rewards/reward_os_mechanics/mean": 0.125, + "rewards/reward_os_mechanics/std": 0.15000000596046448, + "step": 30 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 648.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 858.0, + "completions/max_terminated_length": 858.0, + "completions/mean_length": 648.0, + "completions/mean_terminated_length": 648.0, + "completions/min_length": 461.0, + "completions/min_terminated_length": 461.0, + "epoch": 0.31, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1460961103439331, + "kl": 3.440224281803239e-05, + "learning_rate": 4.415111107797445e-06, + "loss": 1.3560056686401367e-06, + "num_tokens": 177773.0, + "reward": -0.2800000011920929, + "reward_std": 0.8353841304779053, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.125, + "rewards/reward_investigation_quality/std": 0.15000000596046448, + "rewards/reward_os_mechanics/mean": -0.02500000037252903, + "rewards/reward_os_mechanics/std": 0.05000000074505806, + "step": 31 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 832.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1227.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 832.5, + "completions/mean_terminated_length": 832.5, + "completions/min_length": 628.0, + "completions/min_terminated_length": 628.0, + "epoch": 0.32, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0001798434095690027, + "kl": 3.118666154477978e-05, + "learning_rate": 4.357862063693486e-06, + "loss": 1.2474664572437177e-06, + "num_tokens": 184203.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 32 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 519.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 580.0, + "completions/max_terminated_length": 580.0, + "completions/mean_length": 519.0, + "completions/mean_terminated_length": 519.0, + "completions/min_length": 435.0, + "completions/min_terminated_length": 435.0, + "epoch": 0.33, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0003187077818438411, + "kl": 5.177023012947757e-05, + "learning_rate": 4.2983495008466285e-06, + "loss": 2.0708091597043676e-06, + "num_tokens": 189387.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 33 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 670.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 767.0, + "completions/max_terminated_length": 767.0, + "completions/mean_length": 670.5, + "completions/mean_terminated_length": 670.5, + "completions/min_length": 585.0, + "completions/min_terminated_length": 585.0, + "epoch": 0.34, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12342657893896103, + "kl": 3.6297568385634804e-05, + "learning_rate": 4.236645926147493e-06, + "loss": 1.4156103134155273e-06, + "num_tokens": 195333.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 34 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 464.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 645.0, + "completions/max_terminated_length": 645.0, + "completions/mean_length": 464.5, + "completions/mean_terminated_length": 464.5, + "completions/min_length": 195.0, + "completions/min_terminated_length": 195.0, + "epoch": 0.35, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14799511432647705, + "kl": 9.590507397660986e-05, + "learning_rate": 4.172826515897146e-06, + "loss": 3.822147846221924e-06, + "num_tokens": 200295.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 35 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 539.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 667.0, + "completions/max_terminated_length": 667.0, + "completions/mean_length": 539.75, + "completions/mean_terminated_length": 539.75, + "completions/min_length": 403.0, + "completions/min_terminated_length": 403.0, + "epoch": 0.36, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15325292944908142, + "kl": 9.70982619037386e-05, + "learning_rate": 4.106969024216348e-06, + "loss": 3.919005393981934e-06, + "num_tokens": 205554.0, + "reward": -0.23249998688697815, + "reward_std": 0.8862420320510864, + "rewards/reward_environment_execution/mean": -0.23250000178813934, + "rewards/reward_environment_execution/std": 0.3089093565940857, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 36 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 544.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 787.0, + "completions/max_terminated_length": 787.0, + "completions/mean_length": 544.5, + "completions/mean_terminated_length": 544.5, + "completions/min_length": 333.0, + "completions/min_terminated_length": 333.0, + "epoch": 0.37, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15045017004013062, + "kl": 7.959679260238772e-05, + "learning_rate": 4.039153688314146e-06, + "loss": 3.159046173095703e-06, + "num_tokens": 210828.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 37 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 638.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 695.0, + "completions/max_terminated_length": 695.0, + "completions/mean_length": 638.75, + "completions/mean_terminated_length": 638.75, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.38, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.000358834135113284, + "kl": 6.0015446706529474e-05, + "learning_rate": 3.969463130731183e-06, + "loss": 2.4006176317925565e-06, + "num_tokens": 216647.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 38 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 555.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 891.0, + "completions/max_terminated_length": 891.0, + "completions/mean_length": 555.75, + "completions/mean_terminated_length": 555.75, + "completions/min_length": 389.0, + "completions/min_terminated_length": 389.0, + "epoch": 0.39, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.20931142568588257, + "kl": 6.502814630948706e-05, + "learning_rate": 3.897982258676867e-06, + "loss": 2.6226043701171875e-06, + "num_tokens": 221986.0, + "reward": 0.5375000238418579, + "reward_std": 0.004999995231628418, + "rewards/reward_environment_execution/mean": 0.03750000149011612, + "rewards/reward_environment_execution/std": 0.004999999888241291, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 39 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 638.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 786.0, + "completions/max_terminated_length": 786.0, + "completions/mean_length": 638.5, + "completions/mean_terminated_length": 638.5, + "completions/min_length": 469.0, + "completions/min_terminated_length": 469.0, + "epoch": 0.4, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1422736942768097, + "kl": 0.0001064265761669958, + "learning_rate": 3.824798160583012e-06, + "loss": 4.246830940246582e-06, + "num_tokens": 227812.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 40 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 729.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 839.0, + "completions/max_terminated_length": 839.0, + "completions/mean_length": 729.5, + "completions/mean_terminated_length": 729.5, + "completions/min_length": 608.0, + "completions/min_terminated_length": 608.0, + "epoch": 0.41, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11686036735773087, + "kl": 8.182951478374889e-05, + "learning_rate": 3.7500000000000005e-06, + "loss": 3.3080577850341797e-06, + "num_tokens": 233834.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 41 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 582.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 772.0, + "completions/max_terminated_length": 772.0, + "completions/mean_length": 582.0, + "completions/mean_terminated_length": 582.0, + "completions/min_length": 430.0, + "completions/min_terminated_length": 430.0, + "epoch": 0.42, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15176120400428772, + "kl": 0.00013200526700529736, + "learning_rate": 3.6736789069647273e-06, + "loss": 5.21540641784668e-06, + "num_tokens": 239438.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 42 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 797.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 949.0, + "completions/max_terminated_length": 949.0, + "completions/mean_length": 797.75, + "completions/mean_terminated_length": 797.75, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.43, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.10845938324928284, + "kl": 5.39204447704833e-05, + "learning_rate": 3.595927866972694e-06, + "loss": 2.115964889526367e-06, + "num_tokens": 245737.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 43 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 650.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 885.0, + "completions/max_terminated_length": 885.0, + "completions/mean_length": 650.25, + "completions/mean_terminated_length": 650.25, + "completions/min_length": 508.0, + "completions/min_terminated_length": 508.0, + "epoch": 0.44, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.16251257061958313, + "kl": 0.0001057987265085103, + "learning_rate": 3.516841607689501e-06, + "loss": 4.276633262634277e-06, + "num_tokens": 251618.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 44 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 692.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 719.0, + "completions/max_terminated_length": 719.0, + "completions/mean_length": 692.75, + "completions/mean_terminated_length": 692.75, + "completions/min_length": 666.0, + "completions/min_terminated_length": 666.0, + "epoch": 0.45, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15349064767360687, + "kl": 7.098338755895384e-05, + "learning_rate": 3.436516483539781e-06, + "loss": 2.853572368621826e-06, + "num_tokens": 257493.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 45 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 576.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 678.0, + "completions/max_terminated_length": 678.0, + "completions/mean_length": 576.5, + "completions/mean_terminated_length": 576.5, + "completions/min_length": 451.0, + "completions/min_terminated_length": 451.0, + "epoch": 0.46, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.16658417880535126, + "kl": 0.00024796422985673416, + "learning_rate": 3.3550503583141726e-06, + "loss": 9.894371032714844e-06, + "num_tokens": 262907.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 46 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 679.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1068.0, + "completions/max_terminated_length": 1068.0, + "completions/mean_length": 679.75, + "completions/mean_terminated_length": 679.75, + "completions/min_length": 452.0, + "completions/min_terminated_length": 452.0, + "epoch": 0.47, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17304089665412903, + "kl": 0.00017170603496197145, + "learning_rate": 3.272542485937369e-06, + "loss": 6.839632987976074e-06, + "num_tokens": 268906.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 47 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 637.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/max_terminated_length": 698.0, + "completions/mean_length": 637.25, + "completions/mean_terminated_length": 637.25, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.48, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00041183308348990977, + "kl": 0.00011487708798085805, + "learning_rate": 3.189093389542498e-06, + "loss": 4.5950837375130504e-06, + "num_tokens": 274563.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 48 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 802.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 802.25, + "completions/mean_terminated_length": 802.25, + "completions/min_length": 501.0, + "completions/min_terminated_length": 501.0, + "epoch": 0.49, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17298302054405212, + "kl": 0.00021534036750381347, + "learning_rate": 3.1048047389991693e-06, + "loss": 8.627772331237793e-06, + "num_tokens": 281048.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 49 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 676.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 725.0, + "completions/max_terminated_length": 725.0, + "completions/mean_length": 676.0, + "completions/mean_terminated_length": 676.0, + "completions/min_length": 631.0, + "completions/min_terminated_length": 631.0, + "epoch": 0.5, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13108237087726593, + "kl": 0.00016818426684039878, + "learning_rate": 3.019779227044398e-06, + "loss": 6.7427754402160645e-06, + "num_tokens": 287032.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 50 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 636.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 682.0, + "completions/max_terminated_length": 682.0, + "completions/mean_length": 636.25, + "completions/mean_terminated_length": 636.25, + "completions/min_length": 609.0, + "completions/min_terminated_length": 609.0, + "epoch": 0.51, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0004512854793574661, + "kl": 0.00010897797619691119, + "learning_rate": 2.9341204441673267e-06, + "loss": 4.359118975116871e-06, + "num_tokens": 292677.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 51 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 701.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 836.0, + "completions/max_terminated_length": 836.0, + "completions/mean_length": 701.75, + "completions/mean_terminated_length": 701.75, + "completions/min_length": 576.0, + "completions/min_terminated_length": 576.0, + "epoch": 0.52, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12227238714694977, + "kl": 0.00010721037870098371, + "learning_rate": 2.847932752400164e-06, + "loss": 4.291534423828125e-06, + "num_tokens": 298760.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09750000387430191, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 52 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 628.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 686.0, + "completions/max_terminated_length": 686.0, + "completions/mean_length": 628.25, + "completions/mean_terminated_length": 628.25, + "completions/min_length": 586.0, + "completions/min_terminated_length": 586.0, + "epoch": 0.53, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1427244395017624, + "kl": 0.00010186010331381112, + "learning_rate": 2.761321158169134e-06, + "loss": 4.112720489501953e-06, + "num_tokens": 304381.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 53 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 759.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 891.0, + "completions/max_terminated_length": 891.0, + "completions/mean_length": 759.25, + "completions/mean_terminated_length": 759.25, + "completions/min_length": 688.0, + "completions/min_terminated_length": 688.0, + "epoch": 0.54, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11026006191968918, + "kl": 0.00010889488567045191, + "learning_rate": 2.6743911843603134e-06, + "loss": 4.366040229797363e-06, + "num_tokens": 310514.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 54 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 503.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 611.0, + "completions/max_terminated_length": 611.0, + "completions/mean_length": 503.5, + "completions/mean_terminated_length": 503.5, + "completions/min_length": 390.0, + "completions/min_terminated_length": 390.0, + "epoch": 0.55, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14209793508052826, + "kl": 0.000171166546351742, + "learning_rate": 2.587248741756253e-06, + "loss": 6.854534149169922e-06, + "num_tokens": 315808.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 55 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 627.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 742.0, + "completions/max_terminated_length": 742.0, + "completions/mean_length": 627.0, + "completions/mean_terminated_length": 627.0, + "completions/min_length": 511.0, + "completions/min_terminated_length": 511.0, + "epoch": 0.56, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14504601061344147, + "kl": 0.0003341738956805784, + "learning_rate": 2.5e-06, + "loss": 1.3366341590881348e-05, + "num_tokens": 321424.0, + "reward": -0.23249998688697815, + "reward_std": 0.886242151260376, + "rewards/reward_environment_execution/mean": -0.23250000178813934, + "rewards/reward_environment_execution/std": 0.3089093565940857, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 56 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 615.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 628.0, + "completions/max_terminated_length": 628.0, + "completions/mean_length": 615.5, + "completions/mean_terminated_length": 615.5, + "completions/min_length": 602.0, + "completions/min_terminated_length": 602.0, + "epoch": 0.57, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14204947650432587, + "kl": 0.00015341720063588582, + "learning_rate": 2.4127512582437486e-06, + "loss": 6.288290023803711e-06, + "num_tokens": 327142.0, + "reward": 0.6147500276565552, + "reward_std": 0.14949996769428253, + "rewards/reward_environment_execution/mean": 0.16474999487400055, + "rewards/reward_environment_execution/std": 0.24950000643730164, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.25, + "rewards/reward_investigation_quality/std": 0.10000000894069672, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 57 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 660.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 896.0, + "completions/max_terminated_length": 896.0, + "completions/mean_length": 660.0, + "completions/mean_terminated_length": 660.0, + "completions/min_length": 447.0, + "completions/min_terminated_length": 447.0, + "epoch": 0.58, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0004325012268964201, + "kl": 0.0001194007618323667, + "learning_rate": 2.325608815639687e-06, + "loss": 4.77603043691488e-06, + "num_tokens": 333058.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 58 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 708.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 816.0, + "completions/max_terminated_length": 816.0, + "completions/mean_length": 708.5, + "completions/mean_terminated_length": 708.5, + "completions/min_length": 532.0, + "completions/min_terminated_length": 532.0, + "epoch": 0.59, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11153003573417664, + "kl": 0.0001295358106290223, + "learning_rate": 2.238678841830867e-06, + "loss": 5.230307579040527e-06, + "num_tokens": 339156.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 59 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 677.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 804.0, + "completions/max_terminated_length": 804.0, + "completions/mean_length": 677.5, + "completions/mean_terminated_length": 677.5, + "completions/min_length": 603.0, + "completions/min_terminated_length": 603.0, + "epoch": 0.6, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11006420850753784, + "kl": 0.00012863795018347446, + "learning_rate": 2.1520672475998374e-06, + "loss": 5.163252353668213e-06, + "num_tokens": 344974.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 60 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 663.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 794.0, + "completions/max_terminated_length": 794.0, + "completions/mean_length": 663.0, + "completions/mean_terminated_length": 663.0, + "completions/min_length": 600.0, + "completions/min_terminated_length": 600.0, + "epoch": 0.61, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14184613525867462, + "kl": 0.00038980014687695075, + "learning_rate": 2.0658795558326745e-06, + "loss": 1.55717134475708e-05, + "num_tokens": 350734.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 61 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 660.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 827.0, + "completions/max_terminated_length": 827.0, + "completions/mean_length": 660.75, + "completions/mean_terminated_length": 660.75, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.62, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00037498201709240675, + "kl": 0.000125649748952128, + "learning_rate": 1.9802207729556023e-06, + "loss": 5.025989594287239e-06, + "num_tokens": 356657.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 62 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 476.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 645.0, + "completions/max_terminated_length": 645.0, + "completions/mean_length": 476.0, + "completions/mean_terminated_length": 476.0, + "completions/min_length": 255.0, + "completions/min_terminated_length": 255.0, + "epoch": 0.63, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.20083816349506378, + "kl": 0.0002776872024696786, + "learning_rate": 1.895195261000831e-06, + "loss": 1.1064112186431885e-05, + "num_tokens": 361677.0, + "reward": 0.14749999344348907, + "reward_std": 0.7651306986808777, + "rewards/reward_environment_execution/mean": -0.10250000655651093, + "rewards/reward_environment_execution/std": 0.26537710428237915, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 63 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 634.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 855.0, + "completions/max_terminated_length": 855.0, + "completions/mean_length": 634.25, + "completions/mean_terminated_length": 634.25, + "completions/min_length": 506.0, + "completions/min_terminated_length": 506.0, + "epoch": 0.64, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12447395920753479, + "kl": 0.00040143656588043086, + "learning_rate": 1.8109066104575023e-06, + "loss": 1.6085803508758545e-05, + "num_tokens": 367322.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 64 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 641.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 835.0, + "completions/max_terminated_length": 835.0, + "completions/mean_length": 641.75, + "completions/mean_terminated_length": 641.75, + "completions/min_length": 520.0, + "completions/min_terminated_length": 520.0, + "epoch": 0.65, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.19145435094833374, + "kl": 0.00024168194067897275, + "learning_rate": 1.7274575140626318e-06, + "loss": 9.670853614807129e-06, + "num_tokens": 372989.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 65 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 433.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 478.0, + "completions/max_terminated_length": 478.0, + "completions/mean_length": 433.5, + "completions/mean_terminated_length": 433.5, + "completions/min_length": 400.0, + "completions/min_terminated_length": 400.0, + "epoch": 0.66, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1740998774766922, + "kl": 0.0004632533964468166, + "learning_rate": 1.6449496416858285e-06, + "loss": 1.8537044525146484e-05, + "num_tokens": 377831.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 66 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 429.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 696.0, + "completions/max_terminated_length": 696.0, + "completions/mean_length": 429.5, + "completions/mean_terminated_length": 429.5, + "completions/min_length": 228.0, + "completions/min_terminated_length": 228.0, + "epoch": 0.67, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.229574516415596, + "kl": 0.0006340235704556108, + "learning_rate": 1.56348351646022e-06, + "loss": 2.5369226932525635e-05, + "num_tokens": 382649.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 67 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 655.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 870.0, + "completions/max_terminated_length": 870.0, + "completions/mean_length": 655.25, + "completions/mean_terminated_length": 655.25, + "completions/min_length": 544.0, + "completions/min_terminated_length": 544.0, + "epoch": 0.68, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15165656805038452, + "kl": 0.00021119948905834462, + "learning_rate": 1.4831583923105e-06, + "loss": 8.471310138702393e-06, + "num_tokens": 388546.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 68 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 611.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 764.0, + "completions/max_terminated_length": 764.0, + "completions/mean_length": 611.75, + "completions/mean_terminated_length": 611.75, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.69, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1276257336139679, + "kl": 0.00015681719924032222, + "learning_rate": 1.4040721330273063e-06, + "loss": 6.2659382820129395e-06, + "num_tokens": 394097.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 69 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 742.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 948.0, + "completions/max_terminated_length": 948.0, + "completions/mean_length": 742.25, + "completions/mean_terminated_length": 742.25, + "completions/min_length": 557.0, + "completions/min_terminated_length": 557.0, + "epoch": 0.7, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.10219409316778183, + "kl": 0.00011395674209779827, + "learning_rate": 1.3263210930352737e-06, + "loss": 4.589557647705078e-06, + "num_tokens": 400162.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 70 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 575.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 610.0, + "completions/max_terminated_length": 610.0, + "completions/mean_length": 575.75, + "completions/mean_terminated_length": 575.75, + "completions/min_length": 538.0, + "completions/min_terminated_length": 538.0, + "epoch": 0.71, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14183653891086578, + "kl": 0.00029851996441720985, + "learning_rate": 1.2500000000000007e-06, + "loss": 1.1943280696868896e-05, + "num_tokens": 405581.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 71 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 647.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 970.0, + "completions/max_terminated_length": 970.0, + "completions/mean_length": 647.75, + "completions/mean_terminated_length": 647.75, + "completions/min_length": 429.0, + "completions/min_terminated_length": 429.0, + "epoch": 0.72, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15262170135974884, + "kl": 0.0001618979367776774, + "learning_rate": 1.1752018394169882e-06, + "loss": 6.4820051193237305e-06, + "num_tokens": 411464.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 72 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 533.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 845.0, + "completions/max_terminated_length": 845.0, + "completions/mean_length": 533.5, + "completions/mean_terminated_length": 533.5, + "completions/min_length": 328.0, + "completions/min_terminated_length": 328.0, + "epoch": 0.73, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00044858516776002944, + "kl": 0.00018171430565416813, + "learning_rate": 1.1020177413231334e-06, + "loss": 7.26857251720503e-06, + "num_tokens": 416698.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 73 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 492.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 612.0, + "completions/max_terminated_length": 612.0, + "completions/mean_length": 492.0, + "completions/mean_terminated_length": 492.0, + "completions/min_length": 387.0, + "completions/min_terminated_length": 387.0, + "epoch": 0.74, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0007429426186718047, + "kl": 0.00022592291861656122, + "learning_rate": 1.0305368692688175e-06, + "loss": 9.036917617777362e-06, + "num_tokens": 421782.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 74 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 735.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 813.0, + "completions/max_terminated_length": 813.0, + "completions/mean_length": 735.0, + "completions/mean_terminated_length": 735.0, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.75, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12998387217521667, + "kl": 0.0001630033293622546, + "learning_rate": 9.608463116858544e-07, + "loss": 6.616115570068359e-06, + "num_tokens": 427826.0, + "reward": 0.6147500276565552, + "reward_std": 0.14949996769428253, + "rewards/reward_environment_execution/mean": 0.16474999487400055, + "rewards/reward_environment_execution/std": 0.24950000643730164, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.25, + "rewards/reward_investigation_quality/std": 0.10000000894069672, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 75 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 579.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 649.0, + "completions/max_terminated_length": 649.0, + "completions/mean_length": 579.75, + "completions/mean_terminated_length": 579.75, + "completions/min_length": 544.0, + "completions/min_terminated_length": 544.0, + "epoch": 0.76, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17208269238471985, + "kl": 0.0001971699603018351, + "learning_rate": 8.930309757836517e-07, + "loss": 7.897615432739258e-06, + "num_tokens": 433417.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 76 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 747.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 831.0, + "completions/max_terminated_length": 831.0, + "completions/mean_length": 747.25, + "completions/mean_terminated_length": 747.25, + "completions/min_length": 629.0, + "completions/min_terminated_length": 629.0, + "epoch": 0.77, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13295036554336548, + "kl": 0.0001527113163319882, + "learning_rate": 8.271734841028553e-07, + "loss": 6.094574928283691e-06, + "num_tokens": 439678.0, + "reward": 0.5375000238418579, + "reward_std": 0.004999995231628418, + "rewards/reward_environment_execution/mean": 0.03750000149011612, + "rewards/reward_environment_execution/std": 0.004999999888241291, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 77 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 634.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 761.0, + "completions/max_terminated_length": 761.0, + "completions/mean_length": 634.0, + "completions/mean_terminated_length": 634.0, + "completions/min_length": 524.0, + "completions/min_terminated_length": 524.0, + "epoch": 0.78, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0005127466865815222, + "kl": 0.00016469565161969513, + "learning_rate": 7.633540738525066e-07, + "loss": 6.5878257373697124e-06, + "num_tokens": 445314.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 78 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 631.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 725.0, + "completions/max_terminated_length": 725.0, + "completions/mean_length": 631.75, + "completions/mean_terminated_length": 631.75, + "completions/min_length": 588.0, + "completions/min_terminated_length": 588.0, + "epoch": 0.79, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00041453714948147535, + "kl": 0.00014827852828602772, + "learning_rate": 7.016504991533727e-07, + "loss": 5.931141004111851e-06, + "num_tokens": 450941.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 79 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 711.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 767.0, + "completions/max_terminated_length": 767.0, + "completions/mean_length": 711.25, + "completions/mean_terminated_length": 711.25, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.8, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12832164764404297, + "kl": 0.0001775659984559752, + "learning_rate": 6.421379363065142e-07, + "loss": 7.115304470062256e-06, + "num_tokens": 456878.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 80 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 549.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/max_terminated_length": 698.0, + "completions/mean_length": 549.5, + "completions/mean_terminated_length": 549.5, + "completions/min_length": 441.0, + "completions/min_terminated_length": 441.0, + "epoch": 0.81, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0003033373213838786, + "kl": 9.891987610899378e-05, + "learning_rate": 5.848888922025553e-07, + "loss": 3.956794898840599e-06, + "num_tokens": 462188.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 81 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 640.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 777.0, + "completions/max_terminated_length": 777.0, + "completions/mean_length": 640.5, + "completions/mean_terminated_length": 640.5, + "completions/min_length": 441.0, + "completions/min_terminated_length": 441.0, + "epoch": 0.82, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0004360841994639486, + "kl": 0.00013910212692280766, + "learning_rate": 5.299731159831953e-07, + "loss": 5.564085313380929e-06, + "num_tokens": 467846.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 82 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 496.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 648.0, + "completions/max_terminated_length": 648.0, + "completions/mean_length": 496.25, + "completions/mean_terminated_length": 496.25, + "completions/min_length": 156.0, + "completions/min_terminated_length": 156.0, + "epoch": 0.83, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.20748330652713776, + "kl": 0.0010627726915117819, + "learning_rate": 4.774575140626317e-07, + "loss": 4.252791404724121e-05, + "num_tokens": 472947.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 83 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 695.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 754.0, + "completions/max_terminated_length": 754.0, + "completions/mean_length": 695.25, + "completions/mean_terminated_length": 695.25, + "completions/min_length": 604.0, + "completions/min_terminated_length": 604.0, + "epoch": 0.84, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00041813356801867485, + "kl": 0.00015819387954252306, + "learning_rate": 4.27406068612396e-07, + "loss": 6.327755272650393e-06, + "num_tokens": 478828.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 84 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 705.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 862.0, + "completions/max_terminated_length": 862.0, + "completions/mean_length": 705.25, + "completions/mean_terminated_length": 705.25, + "completions/min_length": 517.0, + "completions/min_terminated_length": 517.0, + "epoch": 0.85, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1339874416589737, + "kl": 0.0003547336091287434, + "learning_rate": 3.798797596089351e-07, + "loss": 1.4185905456542969e-05, + "num_tokens": 484917.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 85 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 641.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 835.0, + "completions/max_terminated_length": 835.0, + "completions/mean_length": 641.5, + "completions/mean_terminated_length": 641.5, + "completions/min_length": 526.0, + "completions/min_terminated_length": 526.0, + "epoch": 0.86, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13057945668697357, + "kl": 0.00016476271775900386, + "learning_rate": 3.3493649053890325e-07, + "loss": 6.616115570068359e-06, + "num_tokens": 490747.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 86 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 874.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1365.0, + "completions/max_terminated_length": 1365.0, + "completions/mean_length": 874.5, + "completions/mean_terminated_length": 874.5, + "completions/min_length": 542.0, + "completions/min_terminated_length": 542.0, + "epoch": 0.87, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0004197706875856966, + "kl": 0.00016961217625066638, + "learning_rate": 2.9263101785268253e-07, + "loss": 6.784487140976125e-06, + "num_tokens": 497353.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 87 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 645.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 805.0, + "completions/max_terminated_length": 805.0, + "completions/mean_length": 645.0, + "completions/mean_terminated_length": 645.0, + "completions/min_length": 420.0, + "completions/min_terminated_length": 420.0, + "epoch": 0.88, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14476190507411957, + "kl": 0.00023185212194221094, + "learning_rate": 2.53014884252083e-07, + "loss": 9.28342342376709e-06, + "num_tokens": 503033.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 88 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 588.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 768.0, + "completions/max_terminated_length": 768.0, + "completions/mean_length": 588.75, + "completions/mean_terminated_length": 588.75, + "completions/min_length": 496.0, + "completions/min_terminated_length": 496.0, + "epoch": 0.89, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1939331591129303, + "kl": 0.00025866929718176834, + "learning_rate": 2.1613635589349756e-07, + "loss": 1.0281801223754883e-05, + "num_tokens": 508644.0, + "reward": 0.6150000095367432, + "reward_std": 0.15000000596046448, + "rewards/reward_environment_execution/mean": 0.06499999761581421, + "rewards/reward_environment_execution/std": 0.05000000074505806, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.2750000059604645, + "rewards/reward_investigation_quality/std": 0.05000000447034836, + "rewards/reward_os_mechanics/mean": 0.07500000298023224, + "rewards/reward_os_mechanics/std": 0.15000002086162567, + "step": 89 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 662.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 867.0, + "completions/max_terminated_length": 867.0, + "completions/mean_length": 662.5, + "completions/mean_terminated_length": 662.5, + "completions/min_length": 462.0, + "completions/min_terminated_length": 462.0, + "epoch": 0.9, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0004947524867020547, + "kl": 0.00020826212130486965, + "learning_rate": 1.8204036358303173e-07, + "loss": 8.330483979079872e-06, + "num_tokens": 514402.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 90 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 602.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 697.0, + "completions/max_terminated_length": 697.0, + "completions/mean_length": 602.5, + "completions/mean_terminated_length": 602.5, + "completions/min_length": 545.0, + "completions/min_terminated_length": 545.0, + "epoch": 0.91, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0005595118273049593, + "kl": 0.0001856963317550253, + "learning_rate": 1.507684480352292e-07, + "loss": 7.427853233821224e-06, + "num_tokens": 519900.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 91 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 599.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 741.0, + "completions/max_terminated_length": 741.0, + "completions/mean_length": 599.25, + "completions/mean_terminated_length": 599.25, + "completions/min_length": 396.0, + "completions/min_terminated_length": 396.0, + "epoch": 0.92, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.16615043580532074, + "kl": 0.00019787985365837812, + "learning_rate": 1.223587092621162e-07, + "loss": 7.9423189163208e-06, + "num_tokens": 525405.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 92 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 576.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 715.0, + "completions/max_terminated_length": 715.0, + "completions/mean_length": 576.5, + "completions/mean_terminated_length": 576.5, + "completions/min_length": 512.0, + "completions/min_terminated_length": 512.0, + "epoch": 0.93, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15260961651802063, + "kl": 0.00042037190542032477, + "learning_rate": 9.684576015420277e-08, + "loss": 1.683831214904785e-05, + "num_tokens": 530819.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 93 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 549.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 572.0, + "completions/max_terminated_length": 572.0, + "completions/mean_length": 549.5, + "completions/mean_terminated_length": 549.5, + "completions/min_length": 537.0, + "completions/min_terminated_length": 537.0, + "epoch": 0.94, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0005341690266504884, + "kl": 0.00022523655752593186, + "learning_rate": 7.426068431000883e-08, + "loss": 9.009461791720241e-06, + "num_tokens": 536133.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 94 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 639.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 800.0, + "completions/max_terminated_length": 800.0, + "completions/mean_length": 639.0, + "completions/mean_terminated_length": 639.0, + "completions/min_length": 452.0, + "completions/min_terminated_length": 452.0, + "epoch": 0.95, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1709386706352234, + "kl": 0.00015768476805533282, + "learning_rate": 5.463099816548578e-08, + "loss": 6.3478946685791016e-06, + "num_tokens": 541797.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 95 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 564.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 681.0, + "completions/max_terminated_length": 681.0, + "completions/mean_length": 564.5, + "completions/mean_terminated_length": 564.5, + "completions/min_length": 479.0, + "completions/min_terminated_length": 479.0, + "epoch": 0.96, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14956524968147278, + "kl": 0.0002595126898086164, + "learning_rate": 3.798061746947995e-08, + "loss": 1.0423362255096436e-05, + "num_tokens": 547315.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 96 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 689.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 762.0, + "completions/max_terminated_length": 762.0, + "completions/mean_length": 689.5, + "completions/mean_terminated_length": 689.5, + "completions/min_length": 639.0, + "completions/min_terminated_length": 639.0, + "epoch": 0.97, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1273757517337799, + "kl": 0.000450757157523185, + "learning_rate": 2.4329828146074096e-08, + "loss": 1.8030405044555664e-05, + "num_tokens": 553181.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 97 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 684.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 992.0, + "completions/max_terminated_length": 992.0, + "completions/mean_length": 684.5, + "completions/mean_terminated_length": 684.5, + "completions/min_length": 550.0, + "completions/min_terminated_length": 550.0, + "epoch": 0.98, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1548420637845993, + "kl": 0.00030209575379558373, + "learning_rate": 1.3695261579316776e-08, + "loss": 1.2077391147613525e-05, + "num_tokens": 559015.0, + "reward": 0.249750018119812, + "reward_std": 0.8521073460578918, + "rewards/reward_environment_execution/mean": 0.04975000396370888, + "rewards/reward_environment_execution/std": 0.4569685757160187, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.17500001192092896, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 98 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 571.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 686.0, + "completions/max_terminated_length": 686.0, + "completions/mean_length": 571.75, + "completions/mean_terminated_length": 571.75, + "completions/min_length": 478.0, + "completions/min_terminated_length": 478.0, + "epoch": 0.99, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15080547332763672, + "kl": 0.0005083957512397319, + "learning_rate": 6.089874350439507e-09, + "loss": 2.034008502960205e-05, + "num_tokens": 564590.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 99 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 677.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 774.0, + "completions/max_terminated_length": 774.0, + "completions/mean_length": 677.75, + "completions/mean_terminated_length": 677.75, + "completions/min_length": 558.0, + "completions/min_terminated_length": 558.0, + "epoch": 1.0, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00034051548573188484, + "kl": 0.00015363937745860312, + "learning_rate": 1.5229324522605949e-09, + "loss": 6.145574843685608e-06, + "num_tokens": 570589.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 100 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 570589, + "num_train_epochs": 1, + "save_steps": 25, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a152a78f906cd195d8d8231bc4b418c49eedd15 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9107f9f9e124fcdfbf6ef3ddbf42619d99c323466de7a60f15005895e3f3aa4b +size 6673 diff --git a/checkpoint-25/README.md b/checkpoint-25/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c1faba2efaacd24a9c0a637ded9575527657860d --- /dev/null +++ b/checkpoint-25/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +- grpo +- lora +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-25/adapter_config.json b/checkpoint-25/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0578d0149020358a61e1ca953a43cbd73121376f --- /dev/null +++ b/checkpoint-25/adapter_config.json @@ -0,0 +1,50 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "LlamaForCausalLM", + "parent_library": "transformers.models.llama.modeling_llama", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "v_proj", + "up_proj", + "k_proj", + "down_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-25/adapter_model.safetensors b/checkpoint-25/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2a089c4a2c110e5721daa561a9304a810baf608 --- /dev/null +++ b/checkpoint-25/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ed8ef1a323ba8ad224f88bad378a1738c45bac74e1991a8c6d343063afe2eb +size 167832240 diff --git a/checkpoint-25/chat_template.jinja b/checkpoint-25/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-25/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-25/optimizer.pt b/checkpoint-25/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..45354b3318c2d5f9a58852b0133cbddfc255b1ba --- /dev/null +++ b/checkpoint-25/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7616895c1ca4efcd02c5f18d8e8944d64131256cc611b44b9a537119f1947a5 +size 85728229 diff --git a/checkpoint-25/rng_state.pth b/checkpoint-25/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..26f470af519f670b9197833f27a2cd6788b0bf6c --- /dev/null +++ b/checkpoint-25/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be23aa74b08f1d344a095d0c549f77cee1920ba0a7dbaa9bc9e5fb894d933d03 +size 14645 diff --git a/checkpoint-25/scaler.pt b/checkpoint-25/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc42e000b34d20a85096aff4223e34adc1c77dfd --- /dev/null +++ b/checkpoint-25/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652500b9116ca5d48648b8fc119f3c03e149cea9780a405ddba77469ca31da55 +size 1383 diff --git a/checkpoint-25/scheduler.pt b/checkpoint-25/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..52054dc718c2901ea159d6e2635158507aac7d12 --- /dev/null +++ b/checkpoint-25/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1248490cb066b0cf09b421632ab18e3be38835c093500080485a11183c593dac +size 1465 diff --git a/checkpoint-25/tokenizer.json b/checkpoint-25/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5b9e375b2b4e8c4210d9d8a8a8d0642d1f715076 --- /dev/null +++ b/checkpoint-25/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05 +size 17210099 diff --git a/checkpoint-25/tokenizer_config.json b/checkpoint-25/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a55a3083a0efb3c15b58aa6c3517ac5dfc6d1ca --- /dev/null +++ b/checkpoint-25/tokenizer_config.json @@ -0,0 +1,2068 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "from_slow": true, + "is_local": false, + "legacy": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "TokenizersBackend", + "unk_token": null, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + } +} diff --git a/checkpoint-25/trainer_state.json b/checkpoint-25/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c33fe18c67997ffbb905243439bc721952e52afd --- /dev/null +++ b/checkpoint-25/trainer_state.json @@ -0,0 +1,859 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.25, + "eval_steps": 500, + "global_step": 25, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 659.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 821.0, + "completions/max_terminated_length": 821.0, + "completions/mean_length": 659.0, + "completions/mean_terminated_length": 659.0, + "completions/min_length": 516.0, + "completions/min_terminated_length": 516.0, + "epoch": 0.01, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13492469489574432, + "kl": 6.197717993927654e-06, + "learning_rate": 0.0, + "loss": 2.8312206268310547e-07, + "num_tokens": 5736.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 1 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 543.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 693.0, + "completions/max_terminated_length": 693.0, + "completions/mean_length": 543.75, + "completions/mean_terminated_length": 543.75, + "completions/min_length": 426.0, + "completions/min_terminated_length": 426.0, + "epoch": 0.02, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1295754313468933, + "kl": 7.463787596861948e-06, + "learning_rate": 5.000000000000001e-07, + "loss": 3.725290298461914e-07, + "num_tokens": 11019.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09749999642372131, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 2 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 572.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 677.0, + "completions/max_terminated_length": 677.0, + "completions/mean_length": 572.75, + "completions/mean_terminated_length": 572.75, + "completions/min_length": 490.0, + "completions/min_terminated_length": 490.0, + "epoch": 0.03, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1402619630098343, + "kl": 5.991519174131099e-06, + "learning_rate": 1.0000000000000002e-06, + "loss": 2.2351741790771484e-07, + "num_tokens": 16418.0, + "reward": 0.5375000238418579, + "reward_std": 0.004999995231628418, + "rewards/reward_environment_execution/mean": 0.03750000149011612, + "rewards/reward_environment_execution/std": 0.004999999888241291, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 3 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 705.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 783.0, + "completions/max_terminated_length": 783.0, + "completions/mean_length": 705.0, + "completions/mean_terminated_length": 705.0, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.04, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13879841566085815, + "kl": 6.360480483635911e-06, + "learning_rate": 1.5e-06, + "loss": 2.980232238769531e-07, + "num_tokens": 22514.0, + "reward": 0.24392502009868622, + "reward_std": 0.8475051522254944, + "rewards/reward_environment_execution/mean": 0.04392500966787338, + "rewards/reward_environment_execution/std": 0.4515362083911896, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.17500001192092896, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 4 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 677.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 785.0, + "completions/max_terminated_length": 785.0, + "completions/mean_length": 677.0, + "completions/mean_terminated_length": 677.0, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.05, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.10656019300222397, + "kl": 4.833805178350303e-06, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.2351741790771484e-07, + "num_tokens": 28326.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09750000387430191, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 5 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 701.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/max_terminated_length": 913.0, + "completions/mean_length": 701.0, + "completions/mean_terminated_length": 701.0, + "completions/min_length": 553.0, + "completions/min_terminated_length": 553.0, + "epoch": 0.06, + "frac_reward_zero_std": 1.0, + "grad_norm": 6.133589340606704e-05, + "kl": 6.346002919599414e-06, + "learning_rate": 2.5e-06, + "loss": 2.5384014179508085e-07, + "num_tokens": 34230.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": -0.5, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": -0.5, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.0, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 6 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 524.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 640.0, + "completions/max_terminated_length": 640.0, + "completions/mean_length": 524.0, + "completions/mean_terminated_length": 524.0, + "completions/min_length": 450.0, + "completions/min_terminated_length": 450.0, + "epoch": 0.07, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13970911502838135, + "kl": 6.830848860772676e-06, + "learning_rate": 3e-06, + "loss": 2.8312206268310547e-07, + "num_tokens": 39426.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 7 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 626.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 725.0, + "completions/max_terminated_length": 725.0, + "completions/mean_length": 626.0, + "completions/mean_terminated_length": 626.0, + "completions/min_length": 537.0, + "completions/min_terminated_length": 537.0, + "epoch": 0.08, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12764990329742432, + "kl": 5.7269965054729255e-06, + "learning_rate": 3.5e-06, + "loss": 2.1606683731079102e-07, + "num_tokens": 45026.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 8 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 669.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 752.0, + "completions/max_terminated_length": 752.0, + "completions/mean_length": 669.5, + "completions/mean_terminated_length": 669.5, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.09, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1321861445903778, + "kl": 5.624260097647493e-06, + "learning_rate": 4.000000000000001e-06, + "loss": 2.086162567138672e-07, + "num_tokens": 50812.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 9 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 640.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 790.0, + "completions/max_terminated_length": 790.0, + "completions/mean_length": 640.25, + "completions/mean_terminated_length": 640.25, + "completions/min_length": 471.0, + "completions/min_terminated_length": 471.0, + "epoch": 0.1, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12317467480897903, + "kl": 6.860862526991696e-06, + "learning_rate": 4.5e-06, + "loss": 3.0547380447387695e-07, + "num_tokens": 56485.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 664.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 855.0, + "completions/max_terminated_length": 855.0, + "completions/mean_length": 664.75, + "completions/mean_terminated_length": 664.75, + "completions/min_length": 559.0, + "completions/min_terminated_length": 559.0, + "epoch": 0.11, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1155354306101799, + "kl": 6.481316631834488e-06, + "learning_rate": 5e-06, + "loss": 2.682209014892578e-07, + "num_tokens": 62252.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 11 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 699.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 880.0, + "completions/max_terminated_length": 880.0, + "completions/mean_length": 699.5, + "completions/mean_terminated_length": 699.5, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.12, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11765824258327484, + "kl": 5.136041295372706e-06, + "learning_rate": 4.99847706754774e-06, + "loss": 1.862645149230957e-07, + "num_tokens": 68150.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 12 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 753.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/max_terminated_length": 913.0, + "completions/mean_length": 753.5, + "completions/mean_terminated_length": 753.5, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.13, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1163109764456749, + "kl": 6.4769044456625124e-06, + "learning_rate": 4.993910125649561e-06, + "loss": 2.682209014892578e-07, + "num_tokens": 74436.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 13 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 691.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 870.0, + "completions/max_terminated_length": 870.0, + "completions/mean_length": 691.25, + "completions/mean_terminated_length": 691.25, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.14, + "frac_reward_zero_std": 1.0, + "grad_norm": 5.699428584193811e-05, + "kl": 4.6748977524657676e-06, + "learning_rate": 4.986304738420684e-06, + "loss": 1.869958907718683e-07, + "num_tokens": 80465.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 14 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 588.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 832.0, + "completions/max_terminated_length": 832.0, + "completions/mean_length": 588.0, + "completions/mean_terminated_length": 588.0, + "completions/min_length": 432.0, + "completions/min_terminated_length": 432.0, + "epoch": 0.15, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00010395531717222184, + "kl": 9.099214707930514e-06, + "learning_rate": 4.975670171853926e-06, + "loss": 3.639685814960103e-07, + "num_tokens": 86089.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": -0.5, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": -0.5, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.0, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 15 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 607.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 673.0, + "completions/max_terminated_length": 673.0, + "completions/mean_length": 607.75, + "completions/mean_terminated_length": 607.75, + "completions/min_length": 517.0, + "completions/min_terminated_length": 517.0, + "epoch": 0.16, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1489061564207077, + "kl": 9.158140301224194e-06, + "learning_rate": 4.962019382530521e-06, + "loss": 3.2782554626464844e-07, + "num_tokens": 91620.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 16 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 657.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 737.0, + "completions/max_terminated_length": 737.0, + "completions/mean_length": 657.25, + "completions/mean_terminated_length": 657.25, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.17, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14814995229244232, + "kl": 1.14353706521797e-05, + "learning_rate": 4.9453690018345144e-06, + "loss": 4.470348358154297e-07, + "num_tokens": 97513.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 17 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 549.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 621.0, + "completions/max_terminated_length": 621.0, + "completions/mean_length": 549.25, + "completions/mean_terminated_length": 549.25, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.18, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13028942048549652, + "kl": 1.1230136237827537e-05, + "learning_rate": 4.925739315689991e-06, + "loss": 4.6193599700927734e-07, + "num_tokens": 102810.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 18 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 565.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 686.0, + "completions/max_terminated_length": 686.0, + "completions/mean_length": 565.5, + "completions/mean_terminated_length": 565.5, + "completions/min_length": 288.0, + "completions/min_terminated_length": 288.0, + "epoch": 0.19, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.2010871171951294, + "kl": 1.3259304523671744e-05, + "learning_rate": 4.903154239845798e-06, + "loss": 5.438923835754395e-07, + "num_tokens": 108344.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 19 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 692.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 777.0, + "completions/max_terminated_length": 777.0, + "completions/mean_length": 692.25, + "completions/mean_terminated_length": 692.25, + "completions/min_length": 633.0, + "completions/min_terminated_length": 633.0, + "epoch": 0.2, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12407222390174866, + "kl": 1.1413530501158675e-05, + "learning_rate": 4.8776412907378845e-06, + "loss": 4.6938657760620117e-07, + "num_tokens": 114393.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 633.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 702.0, + "completions/max_terminated_length": 702.0, + "completions/mean_length": 633.0, + "completions/mean_terminated_length": 633.0, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.21, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14354389905929565, + "kl": 1.5171975746852695e-05, + "learning_rate": 4.849231551964771e-06, + "loss": 6.407499313354492e-07, + "num_tokens": 120177.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 21 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 669.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 978.0, + "completions/max_terminated_length": 978.0, + "completions/mean_length": 669.0, + "completions/mean_terminated_length": 669.0, + "completions/min_length": 444.0, + "completions/min_terminated_length": 444.0, + "epoch": 0.22, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12197419255971909, + "kl": 1.5716058214820805e-05, + "learning_rate": 4.817959636416969e-06, + "loss": 6.183981895446777e-07, + "num_tokens": 126125.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 22 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 774.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1024.0, + "completions/max_terminated_length": 1024.0, + "completions/mean_length": 774.25, + "completions/mean_terminated_length": 774.25, + "completions/min_length": 484.0, + "completions/min_terminated_length": 484.0, + "epoch": 0.23, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1378677934408188, + "kl": 2.0351369357740623e-05, + "learning_rate": 4.783863644106502e-06, + "loss": 8.493661880493164e-07, + "num_tokens": 132326.0, + "reward": -0.21249999105930328, + "reward_std": 0.9097756743431091, + "rewards/reward_environment_execution/mean": -0.23750001192092896, + "rewards/reward_environment_execution/std": 0.30335623025894165, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.125, + "rewards/reward_investigation_quality/std": 0.15000000596046448, + "rewards/reward_os_mechanics/mean": 0.05000000074505806, + "rewards/reward_os_mechanics/std": 0.10000000149011612, + "step": 23 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 565.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 732.0, + "completions/max_terminated_length": 732.0, + "completions/mean_length": 565.75, + "completions/mean_terminated_length": 565.75, + "completions/min_length": 314.0, + "completions/min_terminated_length": 314.0, + "epoch": 0.24, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1561768352985382, + "kl": 2.7805408535641618e-05, + "learning_rate": 4.746985115747918e-06, + "loss": 1.1026859283447266e-06, + "num_tokens": 137685.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 24 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 668.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 909.0, + "completions/max_terminated_length": 909.0, + "completions/mean_length": 668.75, + "completions/mean_terminated_length": 668.75, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.25, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13285094499588013, + "kl": 2.5312373509223107e-05, + "learning_rate": 4.707368982147318e-06, + "loss": 1.0132789611816406e-06, + "num_tokens": 143456.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 25 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 143456, + "num_train_epochs": 1, + "save_steps": 25, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-25/training_args.bin b/checkpoint-25/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a152a78f906cd195d8d8231bc4b418c49eedd15 --- /dev/null +++ b/checkpoint-25/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9107f9f9e124fcdfbf6ef3ddbf42619d99c323466de7a60f15005895e3f3aa4b +size 6673 diff --git a/checkpoint-4/README.md b/checkpoint-4/README.md new file mode 100644 index 0000000000000000000000000000000000000000..08f272c2f60c5f1cd52464233c85bed56dd1abf1 --- /dev/null +++ b/checkpoint-4/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/meta-llama-3.1-8b-instruct-unsloth-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/meta-llama-3.1-8b-instruct-unsloth-bnb-4bit +- grpo +- lora +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-4/adapter_config.json b/checkpoint-4/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c93220cafe57218a504e6f0b28125ac3d0695872 --- /dev/null +++ b/checkpoint-4/adapter_config.json @@ -0,0 +1,50 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "LlamaForCausalLM", + "parent_library": "transformers.models.llama.modeling_llama", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/meta-llama-3.1-8b-instruct-unsloth-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "gate_proj", + "up_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-4/adapter_model.safetensors b/checkpoint-4/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41efeeef89c1522a1250cb0bb088557f8633e5db --- /dev/null +++ b/checkpoint-4/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ccc46cb515f92412672aba75ac0a3bfd48ab48e1873d9c3e8c26991f7cc765 +size 167832240 diff --git a/checkpoint-4/chat_template.jinja b/checkpoint-4/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-4/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-4/optimizer.pt b/checkpoint-4/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7590bc38a36cd91cf92fa4d4d7e4a4aac10afbce --- /dev/null +++ b/checkpoint-4/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:322cd0049d0f1a8d65e865adbaa2cbba4ad442e146061dee35db6f7607f2e391 +size 85728229 diff --git a/checkpoint-4/rng_state.pth b/checkpoint-4/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c3d603fbc1189c8ab6c5db9e907b383ae0653aae --- /dev/null +++ b/checkpoint-4/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5106f548299f713bb6f4cd3e1da0ed472cab650679836a014bee56db296564dc +size 14645 diff --git a/checkpoint-4/scaler.pt b/checkpoint-4/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01894f3f2214aa15d33b287032a7ec4e17f958eb --- /dev/null +++ b/checkpoint-4/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b8cb72e5ae34a7b26df52ee7f8be75ae9d7cc7ef5c3607bb31fda7cac77234 +size 1383 diff --git a/checkpoint-4/scheduler.pt b/checkpoint-4/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3312482353167156ca36937f555462e0fab5b2a9 --- /dev/null +++ b/checkpoint-4/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85dc8501a5897aea18f711b9cc8fe9ae28adea7e1f9588f6f31c73e4e67c1707 +size 1465 diff --git a/checkpoint-4/tokenizer.json b/checkpoint-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5b9e375b2b4e8c4210d9d8a8a8d0642d1f715076 --- /dev/null +++ b/checkpoint-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05 +size 17210099 diff --git a/checkpoint-4/tokenizer_config.json b/checkpoint-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a55a3083a0efb3c15b58aa6c3517ac5dfc6d1ca --- /dev/null +++ b/checkpoint-4/tokenizer_config.json @@ -0,0 +1,2068 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "from_slow": true, + "is_local": false, + "legacy": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "TokenizersBackend", + "unk_token": null, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + } +} diff --git a/checkpoint-4/trainer_state.json b/checkpoint-4/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6afbebc42d342b825a535c4285b9c0c594cc5f1d --- /dev/null +++ b/checkpoint-4/trainer_state.json @@ -0,0 +1,166 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 4, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 512.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 512.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 512.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 512.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.25, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.16221363842487335, + "kl": -3.4924596548080444e-10, + "learning_rate": 0.0, + "loss": 0.0, + "num_tokens": 2580.0, + "reward": -0.22999998927116394, + "reward_std": 1.088944435119629, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.3818376660346985, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.4949747622013092, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.2121320366859436, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 1 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 512.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 512.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 512.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 512.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.5, + "frac_reward_zero_std": 1.0, + "grad_norm": 3.505946644111191e-09, + "kl": -4.656612873077393e-10, + "learning_rate": 5.000000000000001e-07, + "loss": -1.8626451075975936e-11, + "num_tokens": 5158.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 2 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 512.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 512.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 512.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 512.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.75, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.20935378968715668, + "kl": 5.7238503359258175e-06, + "learning_rate": 1.0000000000000002e-06, + "loss": 2.384185791015625e-07, + "num_tokens": 7736.0, + "reward": -0.22999998927116394, + "reward_std": 1.088944435119629, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.3818376660346985, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.4949747622013092, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.2121320366859436, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 3 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 512.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 512.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 512.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 512.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.0, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.16330939531326294, + "kl": 5.532288923859596e-06, + "learning_rate": 1.5e-06, + "loss": 2.384185791015625e-07, + "num_tokens": 10392.0, + "reward": -0.22999998927116394, + "reward_std": 1.088944435119629, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.3818376660346985, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.4949747622013092, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.2121320366859436, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 4 + } + ], + "logging_steps": 1, + "max_steps": 4, + "num_input_tokens_seen": 10392, + "num_train_epochs": 1, + "save_steps": 999, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4/training_args.bin b/checkpoint-4/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9fa6d0ffaf3cfb1c87a525297c04982c1f6f489a --- /dev/null +++ b/checkpoint-4/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c21a72bceea315dccc789ec17ac53dffaf0215769b75e7096279d485f465266a +size 6673 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c1faba2efaacd24a9c0a637ded9575527657860d --- /dev/null +++ b/checkpoint-50/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +- grpo +- lora +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0578d0149020358a61e1ca953a43cbd73121376f --- /dev/null +++ b/checkpoint-50/adapter_config.json @@ -0,0 +1,50 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "LlamaForCausalLM", + "parent_library": "transformers.models.llama.modeling_llama", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "v_proj", + "up_proj", + "k_proj", + "down_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f564d54e98b2cac9a5225b80b3e949fa71a0576 --- /dev/null +++ b/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5cf88af0d6cde22ec27529a71ba3c3ff52890a619e1d79859a20ed430bbacb +size 167832240 diff --git a/checkpoint-50/chat_template.jinja b/checkpoint-50/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-50/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..adfb72d6fa6f1836219b7c131cd5cd04701ab3df --- /dev/null +++ b/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53266dcd67aaa80f0a5f59529baa7eecfd6c226094fc065a9204e9bbd63b34b0 +size 85728229 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..71e3e81cefe9685dd298d4a853614b2960d2d008 --- /dev/null +++ b/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dee07ddf658d08f19be41a991352633e375a695f379989a1b0ca15bdf380d204 +size 14645 diff --git a/checkpoint-50/scaler.pt b/checkpoint-50/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..48ade11e4a25ce357344c0aca97f5e25df3bddc4 --- /dev/null +++ b/checkpoint-50/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd0e9d505fbc3f97feb166d29026132bdf14eb3e5c7ff77beebc303ee666f96 +size 1383 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..13133472e85954303661522ad1a5b2bc519aa06d --- /dev/null +++ b/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b580979cb89645c48c0ddb15e07af1b9cd52a573ab6ff2aac42e62cca7e28ff +size 1465 diff --git a/checkpoint-50/tokenizer.json b/checkpoint-50/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5b9e375b2b4e8c4210d9d8a8a8d0642d1f715076 --- /dev/null +++ b/checkpoint-50/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05 +size 17210099 diff --git a/checkpoint-50/tokenizer_config.json b/checkpoint-50/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a55a3083a0efb3c15b58aa6c3517ac5dfc6d1ca --- /dev/null +++ b/checkpoint-50/tokenizer_config.json @@ -0,0 +1,2068 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "from_slow": true, + "is_local": false, + "legacy": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "TokenizersBackend", + "unk_token": null, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + } +} diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1b192552c5eecfc729198e04e81833b323255379 --- /dev/null +++ b/checkpoint-50/trainer_state.json @@ -0,0 +1,1684 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 659.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 821.0, + "completions/max_terminated_length": 821.0, + "completions/mean_length": 659.0, + "completions/mean_terminated_length": 659.0, + "completions/min_length": 516.0, + "completions/min_terminated_length": 516.0, + "epoch": 0.01, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13492469489574432, + "kl": 6.197717993927654e-06, + "learning_rate": 0.0, + "loss": 2.8312206268310547e-07, + "num_tokens": 5736.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 1 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 543.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 693.0, + "completions/max_terminated_length": 693.0, + "completions/mean_length": 543.75, + "completions/mean_terminated_length": 543.75, + "completions/min_length": 426.0, + "completions/min_terminated_length": 426.0, + "epoch": 0.02, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1295754313468933, + "kl": 7.463787596861948e-06, + "learning_rate": 5.000000000000001e-07, + "loss": 3.725290298461914e-07, + "num_tokens": 11019.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09749999642372131, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 2 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 572.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 677.0, + "completions/max_terminated_length": 677.0, + "completions/mean_length": 572.75, + "completions/mean_terminated_length": 572.75, + "completions/min_length": 490.0, + "completions/min_terminated_length": 490.0, + "epoch": 0.03, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1402619630098343, + "kl": 5.991519174131099e-06, + "learning_rate": 1.0000000000000002e-06, + "loss": 2.2351741790771484e-07, + "num_tokens": 16418.0, + "reward": 0.5375000238418579, + "reward_std": 0.004999995231628418, + "rewards/reward_environment_execution/mean": 0.03750000149011612, + "rewards/reward_environment_execution/std": 0.004999999888241291, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 3 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 705.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 783.0, + "completions/max_terminated_length": 783.0, + "completions/mean_length": 705.0, + "completions/mean_terminated_length": 705.0, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.04, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13879841566085815, + "kl": 6.360480483635911e-06, + "learning_rate": 1.5e-06, + "loss": 2.980232238769531e-07, + "num_tokens": 22514.0, + "reward": 0.24392502009868622, + "reward_std": 0.8475051522254944, + "rewards/reward_environment_execution/mean": 0.04392500966787338, + "rewards/reward_environment_execution/std": 0.4515362083911896, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.17500001192092896, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 4 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 677.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 785.0, + "completions/max_terminated_length": 785.0, + "completions/mean_length": 677.0, + "completions/mean_terminated_length": 677.0, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.05, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.10656019300222397, + "kl": 4.833805178350303e-06, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.2351741790771484e-07, + "num_tokens": 28326.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09750000387430191, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 5 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 701.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/max_terminated_length": 913.0, + "completions/mean_length": 701.0, + "completions/mean_terminated_length": 701.0, + "completions/min_length": 553.0, + "completions/min_terminated_length": 553.0, + "epoch": 0.06, + "frac_reward_zero_std": 1.0, + "grad_norm": 6.133589340606704e-05, + "kl": 6.346002919599414e-06, + "learning_rate": 2.5e-06, + "loss": 2.5384014179508085e-07, + "num_tokens": 34230.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": -0.5, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": -0.5, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.0, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 6 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 524.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 640.0, + "completions/max_terminated_length": 640.0, + "completions/mean_length": 524.0, + "completions/mean_terminated_length": 524.0, + "completions/min_length": 450.0, + "completions/min_terminated_length": 450.0, + "epoch": 0.07, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13970911502838135, + "kl": 6.830848860772676e-06, + "learning_rate": 3e-06, + "loss": 2.8312206268310547e-07, + "num_tokens": 39426.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 7 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 626.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 725.0, + "completions/max_terminated_length": 725.0, + "completions/mean_length": 626.0, + "completions/mean_terminated_length": 626.0, + "completions/min_length": 537.0, + "completions/min_terminated_length": 537.0, + "epoch": 0.08, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12764990329742432, + "kl": 5.7269965054729255e-06, + "learning_rate": 3.5e-06, + "loss": 2.1606683731079102e-07, + "num_tokens": 45026.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 8 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 669.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 752.0, + "completions/max_terminated_length": 752.0, + "completions/mean_length": 669.5, + "completions/mean_terminated_length": 669.5, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.09, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1321861445903778, + "kl": 5.624260097647493e-06, + "learning_rate": 4.000000000000001e-06, + "loss": 2.086162567138672e-07, + "num_tokens": 50812.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 9 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 640.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 790.0, + "completions/max_terminated_length": 790.0, + "completions/mean_length": 640.25, + "completions/mean_terminated_length": 640.25, + "completions/min_length": 471.0, + "completions/min_terminated_length": 471.0, + "epoch": 0.1, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12317467480897903, + "kl": 6.860862526991696e-06, + "learning_rate": 4.5e-06, + "loss": 3.0547380447387695e-07, + "num_tokens": 56485.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 664.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 855.0, + "completions/max_terminated_length": 855.0, + "completions/mean_length": 664.75, + "completions/mean_terminated_length": 664.75, + "completions/min_length": 559.0, + "completions/min_terminated_length": 559.0, + "epoch": 0.11, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1155354306101799, + "kl": 6.481316631834488e-06, + "learning_rate": 5e-06, + "loss": 2.682209014892578e-07, + "num_tokens": 62252.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 11 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 699.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 880.0, + "completions/max_terminated_length": 880.0, + "completions/mean_length": 699.5, + "completions/mean_terminated_length": 699.5, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.12, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11765824258327484, + "kl": 5.136041295372706e-06, + "learning_rate": 4.99847706754774e-06, + "loss": 1.862645149230957e-07, + "num_tokens": 68150.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 12 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 753.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/max_terminated_length": 913.0, + "completions/mean_length": 753.5, + "completions/mean_terminated_length": 753.5, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.13, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1163109764456749, + "kl": 6.4769044456625124e-06, + "learning_rate": 4.993910125649561e-06, + "loss": 2.682209014892578e-07, + "num_tokens": 74436.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 13 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 691.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 870.0, + "completions/max_terminated_length": 870.0, + "completions/mean_length": 691.25, + "completions/mean_terminated_length": 691.25, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.14, + "frac_reward_zero_std": 1.0, + "grad_norm": 5.699428584193811e-05, + "kl": 4.6748977524657676e-06, + "learning_rate": 4.986304738420684e-06, + "loss": 1.869958907718683e-07, + "num_tokens": 80465.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 14 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 588.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 832.0, + "completions/max_terminated_length": 832.0, + "completions/mean_length": 588.0, + "completions/mean_terminated_length": 588.0, + "completions/min_length": 432.0, + "completions/min_terminated_length": 432.0, + "epoch": 0.15, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00010395531717222184, + "kl": 9.099214707930514e-06, + "learning_rate": 4.975670171853926e-06, + "loss": 3.639685814960103e-07, + "num_tokens": 86089.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": -0.5, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": -0.5, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.0, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 15 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 607.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 673.0, + "completions/max_terminated_length": 673.0, + "completions/mean_length": 607.75, + "completions/mean_terminated_length": 607.75, + "completions/min_length": 517.0, + "completions/min_terminated_length": 517.0, + "epoch": 0.16, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1489061564207077, + "kl": 9.158140301224194e-06, + "learning_rate": 4.962019382530521e-06, + "loss": 3.2782554626464844e-07, + "num_tokens": 91620.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 16 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 657.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 737.0, + "completions/max_terminated_length": 737.0, + "completions/mean_length": 657.25, + "completions/mean_terminated_length": 657.25, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.17, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14814995229244232, + "kl": 1.14353706521797e-05, + "learning_rate": 4.9453690018345144e-06, + "loss": 4.470348358154297e-07, + "num_tokens": 97513.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 17 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 549.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 621.0, + "completions/max_terminated_length": 621.0, + "completions/mean_length": 549.25, + "completions/mean_terminated_length": 549.25, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.18, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13028942048549652, + "kl": 1.1230136237827537e-05, + "learning_rate": 4.925739315689991e-06, + "loss": 4.6193599700927734e-07, + "num_tokens": 102810.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 18 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 565.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 686.0, + "completions/max_terminated_length": 686.0, + "completions/mean_length": 565.5, + "completions/mean_terminated_length": 565.5, + "completions/min_length": 288.0, + "completions/min_terminated_length": 288.0, + "epoch": 0.19, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.2010871171951294, + "kl": 1.3259304523671744e-05, + "learning_rate": 4.903154239845798e-06, + "loss": 5.438923835754395e-07, + "num_tokens": 108344.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 19 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 692.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 777.0, + "completions/max_terminated_length": 777.0, + "completions/mean_length": 692.25, + "completions/mean_terminated_length": 692.25, + "completions/min_length": 633.0, + "completions/min_terminated_length": 633.0, + "epoch": 0.2, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12407222390174866, + "kl": 1.1413530501158675e-05, + "learning_rate": 4.8776412907378845e-06, + "loss": 4.6938657760620117e-07, + "num_tokens": 114393.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 633.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 702.0, + "completions/max_terminated_length": 702.0, + "completions/mean_length": 633.0, + "completions/mean_terminated_length": 633.0, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.21, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14354389905929565, + "kl": 1.5171975746852695e-05, + "learning_rate": 4.849231551964771e-06, + "loss": 6.407499313354492e-07, + "num_tokens": 120177.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 21 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 669.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 978.0, + "completions/max_terminated_length": 978.0, + "completions/mean_length": 669.0, + "completions/mean_terminated_length": 669.0, + "completions/min_length": 444.0, + "completions/min_terminated_length": 444.0, + "epoch": 0.22, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12197419255971909, + "kl": 1.5716058214820805e-05, + "learning_rate": 4.817959636416969e-06, + "loss": 6.183981895446777e-07, + "num_tokens": 126125.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 22 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 774.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1024.0, + "completions/max_terminated_length": 1024.0, + "completions/mean_length": 774.25, + "completions/mean_terminated_length": 774.25, + "completions/min_length": 484.0, + "completions/min_terminated_length": 484.0, + "epoch": 0.23, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1378677934408188, + "kl": 2.0351369357740623e-05, + "learning_rate": 4.783863644106502e-06, + "loss": 8.493661880493164e-07, + "num_tokens": 132326.0, + "reward": -0.21249999105930328, + "reward_std": 0.9097756743431091, + "rewards/reward_environment_execution/mean": -0.23750001192092896, + "rewards/reward_environment_execution/std": 0.30335623025894165, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.125, + "rewards/reward_investigation_quality/std": 0.15000000596046448, + "rewards/reward_os_mechanics/mean": 0.05000000074505806, + "rewards/reward_os_mechanics/std": 0.10000000149011612, + "step": 23 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 565.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 732.0, + "completions/max_terminated_length": 732.0, + "completions/mean_length": 565.75, + "completions/mean_terminated_length": 565.75, + "completions/min_length": 314.0, + "completions/min_terminated_length": 314.0, + "epoch": 0.24, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1561768352985382, + "kl": 2.7805408535641618e-05, + "learning_rate": 4.746985115747918e-06, + "loss": 1.1026859283447266e-06, + "num_tokens": 137685.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 24 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 668.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 909.0, + "completions/max_terminated_length": 909.0, + "completions/mean_length": 668.75, + "completions/mean_terminated_length": 668.75, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.25, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13285094499588013, + "kl": 2.5312373509223107e-05, + "learning_rate": 4.707368982147318e-06, + "loss": 1.0132789611816406e-06, + "num_tokens": 143456.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 25 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 487.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 559.0, + "completions/max_terminated_length": 559.0, + "completions/mean_length": 487.25, + "completions/mean_terminated_length": 487.25, + "completions/min_length": 420.0, + "completions/min_terminated_length": 420.0, + "epoch": 0.26, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.22987650334835052, + "kl": 2.307988370375824e-05, + "learning_rate": 4.665063509461098e-06, + "loss": 9.5367431640625e-07, + "num_tokens": 148505.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 26 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 667.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 782.0, + "completions/max_terminated_length": 782.0, + "completions/mean_length": 667.25, + "completions/mean_terminated_length": 667.25, + "completions/min_length": 533.0, + "completions/min_terminated_length": 533.0, + "epoch": 0.27, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12192773073911667, + "kl": 2.0764900455105817e-05, + "learning_rate": 4.620120240391065e-06, + "loss": 8.642673492431641e-07, + "num_tokens": 154454.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 27 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 722.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 896.0, + "completions/max_terminated_length": 896.0, + "completions/mean_length": 722.0, + "completions/mean_terminated_length": 722.0, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.28, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11616191267967224, + "kl": 2.81208617707307e-05, + "learning_rate": 4.572593931387604e-06, + "loss": 1.1026859283447266e-06, + "num_tokens": 160446.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 28 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 686.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 743.0, + "completions/max_terminated_length": 743.0, + "completions/mean_length": 686.0, + "completions/mean_terminated_length": 686.0, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.29, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13982541859149933, + "kl": 2.6462233336133067e-05, + "learning_rate": 4.522542485937369e-06, + "loss": 1.043081283569336e-06, + "num_tokens": 166286.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 29 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 673.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 847.0, + "completions/max_terminated_length": 847.0, + "completions/mean_length": 673.75, + "completions/mean_terminated_length": 673.75, + "completions/min_length": 536.0, + "completions/min_terminated_length": 536.0, + "epoch": 0.3, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17358987033367157, + "kl": 4.3880153043573955e-05, + "learning_rate": 4.470026884016805e-06, + "loss": 1.9818544387817383e-06, + "num_tokens": 172089.0, + "reward": 0.6325000524520874, + "reward_std": 0.14221462607383728, + "rewards/reward_environment_execution/mean": 0.057500001043081284, + "rewards/reward_environment_execution/std": 0.056789085268974304, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.25, + "rewards/reward_investigation_quality/std": 0.057735033333301544, + "rewards/reward_os_mechanics/mean": 0.125, + "rewards/reward_os_mechanics/std": 0.15000000596046448, + "step": 30 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 648.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 858.0, + "completions/max_terminated_length": 858.0, + "completions/mean_length": 648.0, + "completions/mean_terminated_length": 648.0, + "completions/min_length": 461.0, + "completions/min_terminated_length": 461.0, + "epoch": 0.31, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1460961103439331, + "kl": 3.440224281803239e-05, + "learning_rate": 4.415111107797445e-06, + "loss": 1.3560056686401367e-06, + "num_tokens": 177773.0, + "reward": -0.2800000011920929, + "reward_std": 0.8353841304779053, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.125, + "rewards/reward_investigation_quality/std": 0.15000000596046448, + "rewards/reward_os_mechanics/mean": -0.02500000037252903, + "rewards/reward_os_mechanics/std": 0.05000000074505806, + "step": 31 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 832.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1227.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 832.5, + "completions/mean_terminated_length": 832.5, + "completions/min_length": 628.0, + "completions/min_terminated_length": 628.0, + "epoch": 0.32, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0001798434095690027, + "kl": 3.118666154477978e-05, + "learning_rate": 4.357862063693486e-06, + "loss": 1.2474664572437177e-06, + "num_tokens": 184203.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 32 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 519.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 580.0, + "completions/max_terminated_length": 580.0, + "completions/mean_length": 519.0, + "completions/mean_terminated_length": 519.0, + "completions/min_length": 435.0, + "completions/min_terminated_length": 435.0, + "epoch": 0.33, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0003187077818438411, + "kl": 5.177023012947757e-05, + "learning_rate": 4.2983495008466285e-06, + "loss": 2.0708091597043676e-06, + "num_tokens": 189387.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 33 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 670.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 767.0, + "completions/max_terminated_length": 767.0, + "completions/mean_length": 670.5, + "completions/mean_terminated_length": 670.5, + "completions/min_length": 585.0, + "completions/min_terminated_length": 585.0, + "epoch": 0.34, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12342657893896103, + "kl": 3.6297568385634804e-05, + "learning_rate": 4.236645926147493e-06, + "loss": 1.4156103134155273e-06, + "num_tokens": 195333.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 34 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 464.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 645.0, + "completions/max_terminated_length": 645.0, + "completions/mean_length": 464.5, + "completions/mean_terminated_length": 464.5, + "completions/min_length": 195.0, + "completions/min_terminated_length": 195.0, + "epoch": 0.35, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14799511432647705, + "kl": 9.590507397660986e-05, + "learning_rate": 4.172826515897146e-06, + "loss": 3.822147846221924e-06, + "num_tokens": 200295.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 35 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 539.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 667.0, + "completions/max_terminated_length": 667.0, + "completions/mean_length": 539.75, + "completions/mean_terminated_length": 539.75, + "completions/min_length": 403.0, + "completions/min_terminated_length": 403.0, + "epoch": 0.36, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15325292944908142, + "kl": 9.70982619037386e-05, + "learning_rate": 4.106969024216348e-06, + "loss": 3.919005393981934e-06, + "num_tokens": 205554.0, + "reward": -0.23249998688697815, + "reward_std": 0.8862420320510864, + "rewards/reward_environment_execution/mean": -0.23250000178813934, + "rewards/reward_environment_execution/std": 0.3089093565940857, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 36 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 544.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 787.0, + "completions/max_terminated_length": 787.0, + "completions/mean_length": 544.5, + "completions/mean_terminated_length": 544.5, + "completions/min_length": 333.0, + "completions/min_terminated_length": 333.0, + "epoch": 0.37, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15045017004013062, + "kl": 7.959679260238772e-05, + "learning_rate": 4.039153688314146e-06, + "loss": 3.159046173095703e-06, + "num_tokens": 210828.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 37 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 638.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 695.0, + "completions/max_terminated_length": 695.0, + "completions/mean_length": 638.75, + "completions/mean_terminated_length": 638.75, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.38, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.000358834135113284, + "kl": 6.0015446706529474e-05, + "learning_rate": 3.969463130731183e-06, + "loss": 2.4006176317925565e-06, + "num_tokens": 216647.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 38 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 555.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 891.0, + "completions/max_terminated_length": 891.0, + "completions/mean_length": 555.75, + "completions/mean_terminated_length": 555.75, + "completions/min_length": 389.0, + "completions/min_terminated_length": 389.0, + "epoch": 0.39, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.20931142568588257, + "kl": 6.502814630948706e-05, + "learning_rate": 3.897982258676867e-06, + "loss": 2.6226043701171875e-06, + "num_tokens": 221986.0, + "reward": 0.5375000238418579, + "reward_std": 0.004999995231628418, + "rewards/reward_environment_execution/mean": 0.03750000149011612, + "rewards/reward_environment_execution/std": 0.004999999888241291, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 39 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 638.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 786.0, + "completions/max_terminated_length": 786.0, + "completions/mean_length": 638.5, + "completions/mean_terminated_length": 638.5, + "completions/min_length": 469.0, + "completions/min_terminated_length": 469.0, + "epoch": 0.4, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1422736942768097, + "kl": 0.0001064265761669958, + "learning_rate": 3.824798160583012e-06, + "loss": 4.246830940246582e-06, + "num_tokens": 227812.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 40 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 729.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 839.0, + "completions/max_terminated_length": 839.0, + "completions/mean_length": 729.5, + "completions/mean_terminated_length": 729.5, + "completions/min_length": 608.0, + "completions/min_terminated_length": 608.0, + "epoch": 0.41, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11686036735773087, + "kl": 8.182951478374889e-05, + "learning_rate": 3.7500000000000005e-06, + "loss": 3.3080577850341797e-06, + "num_tokens": 233834.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 41 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 582.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 772.0, + "completions/max_terminated_length": 772.0, + "completions/mean_length": 582.0, + "completions/mean_terminated_length": 582.0, + "completions/min_length": 430.0, + "completions/min_terminated_length": 430.0, + "epoch": 0.42, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15176120400428772, + "kl": 0.00013200526700529736, + "learning_rate": 3.6736789069647273e-06, + "loss": 5.21540641784668e-06, + "num_tokens": 239438.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 42 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 797.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 949.0, + "completions/max_terminated_length": 949.0, + "completions/mean_length": 797.75, + "completions/mean_terminated_length": 797.75, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.43, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.10845938324928284, + "kl": 5.39204447704833e-05, + "learning_rate": 3.595927866972694e-06, + "loss": 2.115964889526367e-06, + "num_tokens": 245737.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 43 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 650.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 885.0, + "completions/max_terminated_length": 885.0, + "completions/mean_length": 650.25, + "completions/mean_terminated_length": 650.25, + "completions/min_length": 508.0, + "completions/min_terminated_length": 508.0, + "epoch": 0.44, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.16251257061958313, + "kl": 0.0001057987265085103, + "learning_rate": 3.516841607689501e-06, + "loss": 4.276633262634277e-06, + "num_tokens": 251618.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 44 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 692.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 719.0, + "completions/max_terminated_length": 719.0, + "completions/mean_length": 692.75, + "completions/mean_terminated_length": 692.75, + "completions/min_length": 666.0, + "completions/min_terminated_length": 666.0, + "epoch": 0.45, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15349064767360687, + "kl": 7.098338755895384e-05, + "learning_rate": 3.436516483539781e-06, + "loss": 2.853572368621826e-06, + "num_tokens": 257493.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 45 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 576.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 678.0, + "completions/max_terminated_length": 678.0, + "completions/mean_length": 576.5, + "completions/mean_terminated_length": 576.5, + "completions/min_length": 451.0, + "completions/min_terminated_length": 451.0, + "epoch": 0.46, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.16658417880535126, + "kl": 0.00024796422985673416, + "learning_rate": 3.3550503583141726e-06, + "loss": 9.894371032714844e-06, + "num_tokens": 262907.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 46 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 679.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1068.0, + "completions/max_terminated_length": 1068.0, + "completions/mean_length": 679.75, + "completions/mean_terminated_length": 679.75, + "completions/min_length": 452.0, + "completions/min_terminated_length": 452.0, + "epoch": 0.47, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17304089665412903, + "kl": 0.00017170603496197145, + "learning_rate": 3.272542485937369e-06, + "loss": 6.839632987976074e-06, + "num_tokens": 268906.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 47 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 637.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/max_terminated_length": 698.0, + "completions/mean_length": 637.25, + "completions/mean_terminated_length": 637.25, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.48, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00041183308348990977, + "kl": 0.00011487708798085805, + "learning_rate": 3.189093389542498e-06, + "loss": 4.5950837375130504e-06, + "num_tokens": 274563.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 48 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 802.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 802.25, + "completions/mean_terminated_length": 802.25, + "completions/min_length": 501.0, + "completions/min_terminated_length": 501.0, + "epoch": 0.49, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17298302054405212, + "kl": 0.00021534036750381347, + "learning_rate": 3.1048047389991693e-06, + "loss": 8.627772331237793e-06, + "num_tokens": 281048.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 49 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 676.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 725.0, + "completions/max_terminated_length": 725.0, + "completions/mean_length": 676.0, + "completions/mean_terminated_length": 676.0, + "completions/min_length": 631.0, + "completions/min_terminated_length": 631.0, + "epoch": 0.5, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13108237087726593, + "kl": 0.00016818426684039878, + "learning_rate": 3.019779227044398e-06, + "loss": 6.7427754402160645e-06, + "num_tokens": 287032.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 50 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 287032, + "num_train_epochs": 1, + "save_steps": 25, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a152a78f906cd195d8d8231bc4b418c49eedd15 --- /dev/null +++ b/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9107f9f9e124fcdfbf6ef3ddbf42619d99c323466de7a60f15005895e3f3aa4b +size 6673 diff --git a/checkpoint-75/README.md b/checkpoint-75/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c1faba2efaacd24a9c0a637ded9575527657860d --- /dev/null +++ b/checkpoint-75/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit +- grpo +- lora +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-75/adapter_config.json b/checkpoint-75/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0578d0149020358a61e1ca953a43cbd73121376f --- /dev/null +++ b/checkpoint-75/adapter_config.json @@ -0,0 +1,50 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "LlamaForCausalLM", + "parent_library": "transformers.models.llama.modeling_llama", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "v_proj", + "up_proj", + "k_proj", + "down_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-75/adapter_model.safetensors b/checkpoint-75/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e43dc58043264e1f9dce67e0cd86094dca7e4909 --- /dev/null +++ b/checkpoint-75/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ee531a7eb61e6efa4a4c950175ec164766d4571f90ea601e521850cc61e12e +size 167832240 diff --git a/checkpoint-75/chat_template.jinja b/checkpoint-75/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-75/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-75/optimizer.pt b/checkpoint-75/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbaa8b10850cc220b7f348acf9ad44bda00dee32 --- /dev/null +++ b/checkpoint-75/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb8e71185cfb367da984514bf7fcfe3c7eb40d66021284d50368a4790799f77 +size 85728229 diff --git a/checkpoint-75/rng_state.pth b/checkpoint-75/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4471cd9c14fa64db7c42519002b4ab4374fb898c --- /dev/null +++ b/checkpoint-75/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94864f1b4a96a0c0e2653e4aa5be71d4743ca942a57a2160a2663f85c0195af7 +size 14645 diff --git a/checkpoint-75/scaler.pt b/checkpoint-75/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bd874a7e3027a27d116e678fe6dec2d25381c36 --- /dev/null +++ b/checkpoint-75/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eba3a981276f695609c08d8e087781358eb12e9e7074586d4f1cf5d82aadec2 +size 1383 diff --git a/checkpoint-75/scheduler.pt b/checkpoint-75/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..273a1aa0a13e0c313a96592186e76dd0efddb895 --- /dev/null +++ b/checkpoint-75/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbe7c85231f1632cf34603b1e86988fb4dd4a32b125a3c9a92a74762d73ed92d +size 1465 diff --git a/checkpoint-75/tokenizer.json b/checkpoint-75/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5b9e375b2b4e8c4210d9d8a8a8d0642d1f715076 --- /dev/null +++ b/checkpoint-75/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05 +size 17210099 diff --git a/checkpoint-75/tokenizer_config.json b/checkpoint-75/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a55a3083a0efb3c15b58aa6c3517ac5dfc6d1ca --- /dev/null +++ b/checkpoint-75/tokenizer_config.json @@ -0,0 +1,2068 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "from_slow": true, + "is_local": false, + "legacy": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "TokenizersBackend", + "unk_token": null, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + } +} diff --git a/checkpoint-75/trainer_state.json b/checkpoint-75/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f5fcfe945659ece396f8ad14cbd08303b08e75e3 --- /dev/null +++ b/checkpoint-75/trainer_state.json @@ -0,0 +1,2509 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.75, + "eval_steps": 500, + "global_step": 75, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 659.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 821.0, + "completions/max_terminated_length": 821.0, + "completions/mean_length": 659.0, + "completions/mean_terminated_length": 659.0, + "completions/min_length": 516.0, + "completions/min_terminated_length": 516.0, + "epoch": 0.01, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13492469489574432, + "kl": 6.197717993927654e-06, + "learning_rate": 0.0, + "loss": 2.8312206268310547e-07, + "num_tokens": 5736.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 1 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 543.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 693.0, + "completions/max_terminated_length": 693.0, + "completions/mean_length": 543.75, + "completions/mean_terminated_length": 543.75, + "completions/min_length": 426.0, + "completions/min_terminated_length": 426.0, + "epoch": 0.02, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1295754313468933, + "kl": 7.463787596861948e-06, + "learning_rate": 5.000000000000001e-07, + "loss": 3.725290298461914e-07, + "num_tokens": 11019.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09749999642372131, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 2 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 572.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 677.0, + "completions/max_terminated_length": 677.0, + "completions/mean_length": 572.75, + "completions/mean_terminated_length": 572.75, + "completions/min_length": 490.0, + "completions/min_terminated_length": 490.0, + "epoch": 0.03, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1402619630098343, + "kl": 5.991519174131099e-06, + "learning_rate": 1.0000000000000002e-06, + "loss": 2.2351741790771484e-07, + "num_tokens": 16418.0, + "reward": 0.5375000238418579, + "reward_std": 0.004999995231628418, + "rewards/reward_environment_execution/mean": 0.03750000149011612, + "rewards/reward_environment_execution/std": 0.004999999888241291, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 3 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 705.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 783.0, + "completions/max_terminated_length": 783.0, + "completions/mean_length": 705.0, + "completions/mean_terminated_length": 705.0, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.04, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13879841566085815, + "kl": 6.360480483635911e-06, + "learning_rate": 1.5e-06, + "loss": 2.980232238769531e-07, + "num_tokens": 22514.0, + "reward": 0.24392502009868622, + "reward_std": 0.8475051522254944, + "rewards/reward_environment_execution/mean": 0.04392500966787338, + "rewards/reward_environment_execution/std": 0.4515362083911896, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.17500001192092896, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 4 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 677.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 785.0, + "completions/max_terminated_length": 785.0, + "completions/mean_length": 677.0, + "completions/mean_terminated_length": 677.0, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.05, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.10656019300222397, + "kl": 4.833805178350303e-06, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.2351741790771484e-07, + "num_tokens": 28326.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09750000387430191, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 5 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 701.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/max_terminated_length": 913.0, + "completions/mean_length": 701.0, + "completions/mean_terminated_length": 701.0, + "completions/min_length": 553.0, + "completions/min_terminated_length": 553.0, + "epoch": 0.06, + "frac_reward_zero_std": 1.0, + "grad_norm": 6.133589340606704e-05, + "kl": 6.346002919599414e-06, + "learning_rate": 2.5e-06, + "loss": 2.5384014179508085e-07, + "num_tokens": 34230.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": -0.5, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": -0.5, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.0, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 6 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 524.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 640.0, + "completions/max_terminated_length": 640.0, + "completions/mean_length": 524.0, + "completions/mean_terminated_length": 524.0, + "completions/min_length": 450.0, + "completions/min_terminated_length": 450.0, + "epoch": 0.07, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13970911502838135, + "kl": 6.830848860772676e-06, + "learning_rate": 3e-06, + "loss": 2.8312206268310547e-07, + "num_tokens": 39426.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 7 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 626.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 725.0, + "completions/max_terminated_length": 725.0, + "completions/mean_length": 626.0, + "completions/mean_terminated_length": 626.0, + "completions/min_length": 537.0, + "completions/min_terminated_length": 537.0, + "epoch": 0.08, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12764990329742432, + "kl": 5.7269965054729255e-06, + "learning_rate": 3.5e-06, + "loss": 2.1606683731079102e-07, + "num_tokens": 45026.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 8 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 669.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 752.0, + "completions/max_terminated_length": 752.0, + "completions/mean_length": 669.5, + "completions/mean_terminated_length": 669.5, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.09, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1321861445903778, + "kl": 5.624260097647493e-06, + "learning_rate": 4.000000000000001e-06, + "loss": 2.086162567138672e-07, + "num_tokens": 50812.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 9 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 640.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 790.0, + "completions/max_terminated_length": 790.0, + "completions/mean_length": 640.25, + "completions/mean_terminated_length": 640.25, + "completions/min_length": 471.0, + "completions/min_terminated_length": 471.0, + "epoch": 0.1, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12317467480897903, + "kl": 6.860862526991696e-06, + "learning_rate": 4.5e-06, + "loss": 3.0547380447387695e-07, + "num_tokens": 56485.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 664.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 855.0, + "completions/max_terminated_length": 855.0, + "completions/mean_length": 664.75, + "completions/mean_terminated_length": 664.75, + "completions/min_length": 559.0, + "completions/min_terminated_length": 559.0, + "epoch": 0.11, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1155354306101799, + "kl": 6.481316631834488e-06, + "learning_rate": 5e-06, + "loss": 2.682209014892578e-07, + "num_tokens": 62252.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 11 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 699.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 880.0, + "completions/max_terminated_length": 880.0, + "completions/mean_length": 699.5, + "completions/mean_terminated_length": 699.5, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.12, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11765824258327484, + "kl": 5.136041295372706e-06, + "learning_rate": 4.99847706754774e-06, + "loss": 1.862645149230957e-07, + "num_tokens": 68150.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 12 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 753.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/max_terminated_length": 913.0, + "completions/mean_length": 753.5, + "completions/mean_terminated_length": 753.5, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.13, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1163109764456749, + "kl": 6.4769044456625124e-06, + "learning_rate": 4.993910125649561e-06, + "loss": 2.682209014892578e-07, + "num_tokens": 74436.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 13 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 691.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 870.0, + "completions/max_terminated_length": 870.0, + "completions/mean_length": 691.25, + "completions/mean_terminated_length": 691.25, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.14, + "frac_reward_zero_std": 1.0, + "grad_norm": 5.699428584193811e-05, + "kl": 4.6748977524657676e-06, + "learning_rate": 4.986304738420684e-06, + "loss": 1.869958907718683e-07, + "num_tokens": 80465.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 14 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 588.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 832.0, + "completions/max_terminated_length": 832.0, + "completions/mean_length": 588.0, + "completions/mean_terminated_length": 588.0, + "completions/min_length": 432.0, + "completions/min_terminated_length": 432.0, + "epoch": 0.15, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00010395531717222184, + "kl": 9.099214707930514e-06, + "learning_rate": 4.975670171853926e-06, + "loss": 3.639685814960103e-07, + "num_tokens": 86089.0, + "reward": -1.0, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": -0.5, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": -0.5, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.0, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 15 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 607.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 673.0, + "completions/max_terminated_length": 673.0, + "completions/mean_length": 607.75, + "completions/mean_terminated_length": 607.75, + "completions/min_length": 517.0, + "completions/min_terminated_length": 517.0, + "epoch": 0.16, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1489061564207077, + "kl": 9.158140301224194e-06, + "learning_rate": 4.962019382530521e-06, + "loss": 3.2782554626464844e-07, + "num_tokens": 91620.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 16 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 657.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 737.0, + "completions/max_terminated_length": 737.0, + "completions/mean_length": 657.25, + "completions/mean_terminated_length": 657.25, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.17, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14814995229244232, + "kl": 1.14353706521797e-05, + "learning_rate": 4.9453690018345144e-06, + "loss": 4.470348358154297e-07, + "num_tokens": 97513.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 17 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 549.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 621.0, + "completions/max_terminated_length": 621.0, + "completions/mean_length": 549.25, + "completions/mean_terminated_length": 549.25, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.18, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13028942048549652, + "kl": 1.1230136237827537e-05, + "learning_rate": 4.925739315689991e-06, + "loss": 4.6193599700927734e-07, + "num_tokens": 102810.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 18 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 565.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 686.0, + "completions/max_terminated_length": 686.0, + "completions/mean_length": 565.5, + "completions/mean_terminated_length": 565.5, + "completions/min_length": 288.0, + "completions/min_terminated_length": 288.0, + "epoch": 0.19, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.2010871171951294, + "kl": 1.3259304523671744e-05, + "learning_rate": 4.903154239845798e-06, + "loss": 5.438923835754395e-07, + "num_tokens": 108344.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 19 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 692.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 777.0, + "completions/max_terminated_length": 777.0, + "completions/mean_length": 692.25, + "completions/mean_terminated_length": 692.25, + "completions/min_length": 633.0, + "completions/min_terminated_length": 633.0, + "epoch": 0.2, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12407222390174866, + "kl": 1.1413530501158675e-05, + "learning_rate": 4.8776412907378845e-06, + "loss": 4.6938657760620117e-07, + "num_tokens": 114393.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 633.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 702.0, + "completions/max_terminated_length": 702.0, + "completions/mean_length": 633.0, + "completions/mean_terminated_length": 633.0, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.21, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14354389905929565, + "kl": 1.5171975746852695e-05, + "learning_rate": 4.849231551964771e-06, + "loss": 6.407499313354492e-07, + "num_tokens": 120177.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 21 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 669.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 978.0, + "completions/max_terminated_length": 978.0, + "completions/mean_length": 669.0, + "completions/mean_terminated_length": 669.0, + "completions/min_length": 444.0, + "completions/min_terminated_length": 444.0, + "epoch": 0.22, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12197419255971909, + "kl": 1.5716058214820805e-05, + "learning_rate": 4.817959636416969e-06, + "loss": 6.183981895446777e-07, + "num_tokens": 126125.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 22 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 774.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1024.0, + "completions/max_terminated_length": 1024.0, + "completions/mean_length": 774.25, + "completions/mean_terminated_length": 774.25, + "completions/min_length": 484.0, + "completions/min_terminated_length": 484.0, + "epoch": 0.23, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1378677934408188, + "kl": 2.0351369357740623e-05, + "learning_rate": 4.783863644106502e-06, + "loss": 8.493661880493164e-07, + "num_tokens": 132326.0, + "reward": -0.21249999105930328, + "reward_std": 0.9097756743431091, + "rewards/reward_environment_execution/mean": -0.23750001192092896, + "rewards/reward_environment_execution/std": 0.30335623025894165, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.125, + "rewards/reward_investigation_quality/std": 0.15000000596046448, + "rewards/reward_os_mechanics/mean": 0.05000000074505806, + "rewards/reward_os_mechanics/std": 0.10000000149011612, + "step": 23 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 565.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 732.0, + "completions/max_terminated_length": 732.0, + "completions/mean_length": 565.75, + "completions/mean_terminated_length": 565.75, + "completions/min_length": 314.0, + "completions/min_terminated_length": 314.0, + "epoch": 0.24, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1561768352985382, + "kl": 2.7805408535641618e-05, + "learning_rate": 4.746985115747918e-06, + "loss": 1.1026859283447266e-06, + "num_tokens": 137685.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 24 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 668.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 909.0, + "completions/max_terminated_length": 909.0, + "completions/mean_length": 668.75, + "completions/mean_terminated_length": 668.75, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.25, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13285094499588013, + "kl": 2.5312373509223107e-05, + "learning_rate": 4.707368982147318e-06, + "loss": 1.0132789611816406e-06, + "num_tokens": 143456.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 25 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 487.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 559.0, + "completions/max_terminated_length": 559.0, + "completions/mean_length": 487.25, + "completions/mean_terminated_length": 487.25, + "completions/min_length": 420.0, + "completions/min_terminated_length": 420.0, + "epoch": 0.26, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.22987650334835052, + "kl": 2.307988370375824e-05, + "learning_rate": 4.665063509461098e-06, + "loss": 9.5367431640625e-07, + "num_tokens": 148505.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 26 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 667.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 782.0, + "completions/max_terminated_length": 782.0, + "completions/mean_length": 667.25, + "completions/mean_terminated_length": 667.25, + "completions/min_length": 533.0, + "completions/min_terminated_length": 533.0, + "epoch": 0.27, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12192773073911667, + "kl": 2.0764900455105817e-05, + "learning_rate": 4.620120240391065e-06, + "loss": 8.642673492431641e-07, + "num_tokens": 154454.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 27 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 722.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 896.0, + "completions/max_terminated_length": 896.0, + "completions/mean_length": 722.0, + "completions/mean_terminated_length": 722.0, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.28, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11616191267967224, + "kl": 2.81208617707307e-05, + "learning_rate": 4.572593931387604e-06, + "loss": 1.1026859283447266e-06, + "num_tokens": 160446.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 28 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 686.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 743.0, + "completions/max_terminated_length": 743.0, + "completions/mean_length": 686.0, + "completions/mean_terminated_length": 686.0, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.29, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13982541859149933, + "kl": 2.6462233336133067e-05, + "learning_rate": 4.522542485937369e-06, + "loss": 1.043081283569336e-06, + "num_tokens": 166286.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 29 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 673.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 847.0, + "completions/max_terminated_length": 847.0, + "completions/mean_length": 673.75, + "completions/mean_terminated_length": 673.75, + "completions/min_length": 536.0, + "completions/min_terminated_length": 536.0, + "epoch": 0.3, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17358987033367157, + "kl": 4.3880153043573955e-05, + "learning_rate": 4.470026884016805e-06, + "loss": 1.9818544387817383e-06, + "num_tokens": 172089.0, + "reward": 0.6325000524520874, + "reward_std": 0.14221462607383728, + "rewards/reward_environment_execution/mean": 0.057500001043081284, + "rewards/reward_environment_execution/std": 0.056789085268974304, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.25, + "rewards/reward_investigation_quality/std": 0.057735033333301544, + "rewards/reward_os_mechanics/mean": 0.125, + "rewards/reward_os_mechanics/std": 0.15000000596046448, + "step": 30 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 648.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 858.0, + "completions/max_terminated_length": 858.0, + "completions/mean_length": 648.0, + "completions/mean_terminated_length": 648.0, + "completions/min_length": 461.0, + "completions/min_terminated_length": 461.0, + "epoch": 0.31, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1460961103439331, + "kl": 3.440224281803239e-05, + "learning_rate": 4.415111107797445e-06, + "loss": 1.3560056686401367e-06, + "num_tokens": 177773.0, + "reward": -0.2800000011920929, + "reward_std": 0.8353841304779053, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.125, + "rewards/reward_investigation_quality/std": 0.15000000596046448, + "rewards/reward_os_mechanics/mean": -0.02500000037252903, + "rewards/reward_os_mechanics/std": 0.05000000074505806, + "step": 31 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 832.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1227.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 832.5, + "completions/mean_terminated_length": 832.5, + "completions/min_length": 628.0, + "completions/min_terminated_length": 628.0, + "epoch": 0.32, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0001798434095690027, + "kl": 3.118666154477978e-05, + "learning_rate": 4.357862063693486e-06, + "loss": 1.2474664572437177e-06, + "num_tokens": 184203.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 32 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 519.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 580.0, + "completions/max_terminated_length": 580.0, + "completions/mean_length": 519.0, + "completions/mean_terminated_length": 519.0, + "completions/min_length": 435.0, + "completions/min_terminated_length": 435.0, + "epoch": 0.33, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0003187077818438411, + "kl": 5.177023012947757e-05, + "learning_rate": 4.2983495008466285e-06, + "loss": 2.0708091597043676e-06, + "num_tokens": 189387.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 33 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 670.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 767.0, + "completions/max_terminated_length": 767.0, + "completions/mean_length": 670.5, + "completions/mean_terminated_length": 670.5, + "completions/min_length": 585.0, + "completions/min_terminated_length": 585.0, + "epoch": 0.34, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12342657893896103, + "kl": 3.6297568385634804e-05, + "learning_rate": 4.236645926147493e-06, + "loss": 1.4156103134155273e-06, + "num_tokens": 195333.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 34 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 464.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 645.0, + "completions/max_terminated_length": 645.0, + "completions/mean_length": 464.5, + "completions/mean_terminated_length": 464.5, + "completions/min_length": 195.0, + "completions/min_terminated_length": 195.0, + "epoch": 0.35, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14799511432647705, + "kl": 9.590507397660986e-05, + "learning_rate": 4.172826515897146e-06, + "loss": 3.822147846221924e-06, + "num_tokens": 200295.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 35 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 539.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 667.0, + "completions/max_terminated_length": 667.0, + "completions/mean_length": 539.75, + "completions/mean_terminated_length": 539.75, + "completions/min_length": 403.0, + "completions/min_terminated_length": 403.0, + "epoch": 0.36, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15325292944908142, + "kl": 9.70982619037386e-05, + "learning_rate": 4.106969024216348e-06, + "loss": 3.919005393981934e-06, + "num_tokens": 205554.0, + "reward": -0.23249998688697815, + "reward_std": 0.8862420320510864, + "rewards/reward_environment_execution/mean": -0.23250000178813934, + "rewards/reward_environment_execution/std": 0.3089093565940857, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 36 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 544.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 787.0, + "completions/max_terminated_length": 787.0, + "completions/mean_length": 544.5, + "completions/mean_terminated_length": 544.5, + "completions/min_length": 333.0, + "completions/min_terminated_length": 333.0, + "epoch": 0.37, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15045017004013062, + "kl": 7.959679260238772e-05, + "learning_rate": 4.039153688314146e-06, + "loss": 3.159046173095703e-06, + "num_tokens": 210828.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 37 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 638.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 695.0, + "completions/max_terminated_length": 695.0, + "completions/mean_length": 638.75, + "completions/mean_terminated_length": 638.75, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.38, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.000358834135113284, + "kl": 6.0015446706529474e-05, + "learning_rate": 3.969463130731183e-06, + "loss": 2.4006176317925565e-06, + "num_tokens": 216647.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 38 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 555.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 891.0, + "completions/max_terminated_length": 891.0, + "completions/mean_length": 555.75, + "completions/mean_terminated_length": 555.75, + "completions/min_length": 389.0, + "completions/min_terminated_length": 389.0, + "epoch": 0.39, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.20931142568588257, + "kl": 6.502814630948706e-05, + "learning_rate": 3.897982258676867e-06, + "loss": 2.6226043701171875e-06, + "num_tokens": 221986.0, + "reward": 0.5375000238418579, + "reward_std": 0.004999995231628418, + "rewards/reward_environment_execution/mean": 0.03750000149011612, + "rewards/reward_environment_execution/std": 0.004999999888241291, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 39 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 638.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 786.0, + "completions/max_terminated_length": 786.0, + "completions/mean_length": 638.5, + "completions/mean_terminated_length": 638.5, + "completions/min_length": 469.0, + "completions/min_terminated_length": 469.0, + "epoch": 0.4, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1422736942768097, + "kl": 0.0001064265761669958, + "learning_rate": 3.824798160583012e-06, + "loss": 4.246830940246582e-06, + "num_tokens": 227812.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 40 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 729.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 839.0, + "completions/max_terminated_length": 839.0, + "completions/mean_length": 729.5, + "completions/mean_terminated_length": 729.5, + "completions/min_length": 608.0, + "completions/min_terminated_length": 608.0, + "epoch": 0.41, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11686036735773087, + "kl": 8.182951478374889e-05, + "learning_rate": 3.7500000000000005e-06, + "loss": 3.3080577850341797e-06, + "num_tokens": 233834.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 41 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 582.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 772.0, + "completions/max_terminated_length": 772.0, + "completions/mean_length": 582.0, + "completions/mean_terminated_length": 582.0, + "completions/min_length": 430.0, + "completions/min_terminated_length": 430.0, + "epoch": 0.42, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15176120400428772, + "kl": 0.00013200526700529736, + "learning_rate": 3.6736789069647273e-06, + "loss": 5.21540641784668e-06, + "num_tokens": 239438.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 42 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 797.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 949.0, + "completions/max_terminated_length": 949.0, + "completions/mean_length": 797.75, + "completions/mean_terminated_length": 797.75, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.43, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.10845938324928284, + "kl": 5.39204447704833e-05, + "learning_rate": 3.595927866972694e-06, + "loss": 2.115964889526367e-06, + "num_tokens": 245737.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 43 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 650.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 885.0, + "completions/max_terminated_length": 885.0, + "completions/mean_length": 650.25, + "completions/mean_terminated_length": 650.25, + "completions/min_length": 508.0, + "completions/min_terminated_length": 508.0, + "epoch": 0.44, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.16251257061958313, + "kl": 0.0001057987265085103, + "learning_rate": 3.516841607689501e-06, + "loss": 4.276633262634277e-06, + "num_tokens": 251618.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 44 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 692.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 719.0, + "completions/max_terminated_length": 719.0, + "completions/mean_length": 692.75, + "completions/mean_terminated_length": 692.75, + "completions/min_length": 666.0, + "completions/min_terminated_length": 666.0, + "epoch": 0.45, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15349064767360687, + "kl": 7.098338755895384e-05, + "learning_rate": 3.436516483539781e-06, + "loss": 2.853572368621826e-06, + "num_tokens": 257493.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 45 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 576.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 678.0, + "completions/max_terminated_length": 678.0, + "completions/mean_length": 576.5, + "completions/mean_terminated_length": 576.5, + "completions/min_length": 451.0, + "completions/min_terminated_length": 451.0, + "epoch": 0.46, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.16658417880535126, + "kl": 0.00024796422985673416, + "learning_rate": 3.3550503583141726e-06, + "loss": 9.894371032714844e-06, + "num_tokens": 262907.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 46 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 679.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1068.0, + "completions/max_terminated_length": 1068.0, + "completions/mean_length": 679.75, + "completions/mean_terminated_length": 679.75, + "completions/min_length": 452.0, + "completions/min_terminated_length": 452.0, + "epoch": 0.47, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17304089665412903, + "kl": 0.00017170603496197145, + "learning_rate": 3.272542485937369e-06, + "loss": 6.839632987976074e-06, + "num_tokens": 268906.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 47 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 637.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/max_terminated_length": 698.0, + "completions/mean_length": 637.25, + "completions/mean_terminated_length": 637.25, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.48, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00041183308348990977, + "kl": 0.00011487708798085805, + "learning_rate": 3.189093389542498e-06, + "loss": 4.5950837375130504e-06, + "num_tokens": 274563.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 48 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 802.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 802.25, + "completions/mean_terminated_length": 802.25, + "completions/min_length": 501.0, + "completions/min_terminated_length": 501.0, + "epoch": 0.49, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17298302054405212, + "kl": 0.00021534036750381347, + "learning_rate": 3.1048047389991693e-06, + "loss": 8.627772331237793e-06, + "num_tokens": 281048.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 49 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 676.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 725.0, + "completions/max_terminated_length": 725.0, + "completions/mean_length": 676.0, + "completions/mean_terminated_length": 676.0, + "completions/min_length": 631.0, + "completions/min_terminated_length": 631.0, + "epoch": 0.5, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.13108237087726593, + "kl": 0.00016818426684039878, + "learning_rate": 3.019779227044398e-06, + "loss": 6.7427754402160645e-06, + "num_tokens": 287032.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 50 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 636.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 682.0, + "completions/max_terminated_length": 682.0, + "completions/mean_length": 636.25, + "completions/mean_terminated_length": 636.25, + "completions/min_length": 609.0, + "completions/min_terminated_length": 609.0, + "epoch": 0.51, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0004512854793574661, + "kl": 0.00010897797619691119, + "learning_rate": 2.9341204441673267e-06, + "loss": 4.359118975116871e-06, + "num_tokens": 292677.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 51 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 701.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 836.0, + "completions/max_terminated_length": 836.0, + "completions/mean_length": 701.75, + "completions/mean_terminated_length": 701.75, + "completions/min_length": 576.0, + "completions/min_terminated_length": 576.0, + "epoch": 0.52, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12227238714694977, + "kl": 0.00010721037870098371, + "learning_rate": 2.847932752400164e-06, + "loss": 4.291534423828125e-06, + "num_tokens": 298760.0, + "reward": 0.15250001847743988, + "reward_std": 0.7683477997779846, + "rewards/reward_environment_execution/mean": -0.09750000387430191, + "rewards/reward_environment_execution/std": 0.2683747410774231, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 52 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 628.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 686.0, + "completions/max_terminated_length": 686.0, + "completions/mean_length": 628.25, + "completions/mean_terminated_length": 628.25, + "completions/min_length": 586.0, + "completions/min_terminated_length": 586.0, + "epoch": 0.53, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1427244395017624, + "kl": 0.00010186010331381112, + "learning_rate": 2.761321158169134e-06, + "loss": 4.112720489501953e-06, + "num_tokens": 304381.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 53 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 759.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 891.0, + "completions/max_terminated_length": 891.0, + "completions/mean_length": 759.25, + "completions/mean_terminated_length": 759.25, + "completions/min_length": 688.0, + "completions/min_terminated_length": 688.0, + "epoch": 0.54, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11026006191968918, + "kl": 0.00010889488567045191, + "learning_rate": 2.6743911843603134e-06, + "loss": 4.366040229797363e-06, + "num_tokens": 310514.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 54 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 503.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 611.0, + "completions/max_terminated_length": 611.0, + "completions/mean_length": 503.5, + "completions/mean_terminated_length": 503.5, + "completions/min_length": 390.0, + "completions/min_terminated_length": 390.0, + "epoch": 0.55, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14209793508052826, + "kl": 0.000171166546351742, + "learning_rate": 2.587248741756253e-06, + "loss": 6.854534149169922e-06, + "num_tokens": 315808.0, + "reward": -0.6150000095367432, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.36500000953674316, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": -0.32499998807907104, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.07500000298023224, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 55 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 627.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 742.0, + "completions/max_terminated_length": 742.0, + "completions/mean_length": 627.0, + "completions/mean_terminated_length": 627.0, + "completions/min_length": 511.0, + "completions/min_terminated_length": 511.0, + "epoch": 0.56, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14504601061344147, + "kl": 0.0003341738956805784, + "learning_rate": 2.5e-06, + "loss": 1.3366341590881348e-05, + "num_tokens": 321424.0, + "reward": -0.23249998688697815, + "reward_std": 0.886242151260376, + "rewards/reward_environment_execution/mean": -0.23250000178813934, + "rewards/reward_environment_execution/std": 0.3089093565940857, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 56 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 615.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 628.0, + "completions/max_terminated_length": 628.0, + "completions/mean_length": 615.5, + "completions/mean_terminated_length": 615.5, + "completions/min_length": 602.0, + "completions/min_terminated_length": 602.0, + "epoch": 0.57, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14204947650432587, + "kl": 0.00015341720063588582, + "learning_rate": 2.4127512582437486e-06, + "loss": 6.288290023803711e-06, + "num_tokens": 327142.0, + "reward": 0.6147500276565552, + "reward_std": 0.14949996769428253, + "rewards/reward_environment_execution/mean": 0.16474999487400055, + "rewards/reward_environment_execution/std": 0.24950000643730164, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.25, + "rewards/reward_investigation_quality/std": 0.10000000894069672, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 57 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 660.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 896.0, + "completions/max_terminated_length": 896.0, + "completions/mean_length": 660.0, + "completions/mean_terminated_length": 660.0, + "completions/min_length": 447.0, + "completions/min_terminated_length": 447.0, + "epoch": 0.58, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0004325012268964201, + "kl": 0.0001194007618323667, + "learning_rate": 2.325608815639687e-06, + "loss": 4.77603043691488e-06, + "num_tokens": 333058.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 58 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 708.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 816.0, + "completions/max_terminated_length": 816.0, + "completions/mean_length": 708.5, + "completions/mean_terminated_length": 708.5, + "completions/min_length": 532.0, + "completions/min_terminated_length": 532.0, + "epoch": 0.59, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11153003573417664, + "kl": 0.0001295358106290223, + "learning_rate": 2.238678841830867e-06, + "loss": 5.230307579040527e-06, + "num_tokens": 339156.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 59 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 677.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 804.0, + "completions/max_terminated_length": 804.0, + "completions/mean_length": 677.5, + "completions/mean_terminated_length": 677.5, + "completions/min_length": 603.0, + "completions/min_terminated_length": 603.0, + "epoch": 0.6, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.11006420850753784, + "kl": 0.00012863795018347446, + "learning_rate": 2.1520672475998374e-06, + "loss": 5.163252353668213e-06, + "num_tokens": 344974.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 60 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 663.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 794.0, + "completions/max_terminated_length": 794.0, + "completions/mean_length": 663.0, + "completions/mean_terminated_length": 663.0, + "completions/min_length": 600.0, + "completions/min_terminated_length": 600.0, + "epoch": 0.61, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14184613525867462, + "kl": 0.00038980014687695075, + "learning_rate": 2.0658795558326745e-06, + "loss": 1.55717134475708e-05, + "num_tokens": 350734.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 61 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 660.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 827.0, + "completions/max_terminated_length": 827.0, + "completions/mean_length": 660.75, + "completions/mean_terminated_length": 660.75, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.62, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00037498201709240675, + "kl": 0.000125649748952128, + "learning_rate": 1.9802207729556023e-06, + "loss": 5.025989594287239e-06, + "num_tokens": 356657.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 62 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 476.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 645.0, + "completions/max_terminated_length": 645.0, + "completions/mean_length": 476.0, + "completions/mean_terminated_length": 476.0, + "completions/min_length": 255.0, + "completions/min_terminated_length": 255.0, + "epoch": 0.63, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.20083816349506378, + "kl": 0.0002776872024696786, + "learning_rate": 1.895195261000831e-06, + "loss": 1.1064112186431885e-05, + "num_tokens": 361677.0, + "reward": 0.14749999344348907, + "reward_std": 0.7651306986808777, + "rewards/reward_environment_execution/mean": -0.10250000655651093, + "rewards/reward_environment_execution/std": 0.26537710428237915, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 63 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 634.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 855.0, + "completions/max_terminated_length": 855.0, + "completions/mean_length": 634.25, + "completions/mean_terminated_length": 634.25, + "completions/min_length": 506.0, + "completions/min_terminated_length": 506.0, + "epoch": 0.64, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12447395920753479, + "kl": 0.00040143656588043086, + "learning_rate": 1.8109066104575023e-06, + "loss": 1.6085803508758545e-05, + "num_tokens": 367322.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 64 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 641.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 835.0, + "completions/max_terminated_length": 835.0, + "completions/mean_length": 641.75, + "completions/mean_terminated_length": 641.75, + "completions/min_length": 520.0, + "completions/min_terminated_length": 520.0, + "epoch": 0.65, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.19145435094833374, + "kl": 0.00024168194067897275, + "learning_rate": 1.7274575140626318e-06, + "loss": 9.670853614807129e-06, + "num_tokens": 372989.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 65 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 433.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 478.0, + "completions/max_terminated_length": 478.0, + "completions/mean_length": 433.5, + "completions/mean_terminated_length": 433.5, + "completions/min_length": 400.0, + "completions/min_terminated_length": 400.0, + "epoch": 0.66, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1740998774766922, + "kl": 0.0004632533964468166, + "learning_rate": 1.6449496416858285e-06, + "loss": 1.8537044525146484e-05, + "num_tokens": 377831.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 66 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 429.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 696.0, + "completions/max_terminated_length": 696.0, + "completions/mean_length": 429.5, + "completions/mean_terminated_length": 429.5, + "completions/min_length": 228.0, + "completions/min_terminated_length": 228.0, + "epoch": 0.67, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.229574516415596, + "kl": 0.0006340235704556108, + "learning_rate": 1.56348351646022e-06, + "loss": 2.5369226932525635e-05, + "num_tokens": 382649.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 67 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 655.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 870.0, + "completions/max_terminated_length": 870.0, + "completions/mean_length": 655.25, + "completions/mean_terminated_length": 655.25, + "completions/min_length": 544.0, + "completions/min_terminated_length": 544.0, + "epoch": 0.68, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15165656805038452, + "kl": 0.00021119948905834462, + "learning_rate": 1.4831583923105e-06, + "loss": 8.471310138702393e-06, + "num_tokens": 388546.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 68 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 611.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 764.0, + "completions/max_terminated_length": 764.0, + "completions/mean_length": 611.75, + "completions/mean_terminated_length": 611.75, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.69, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.1276257336139679, + "kl": 0.00015681719924032222, + "learning_rate": 1.4040721330273063e-06, + "loss": 6.2659382820129395e-06, + "num_tokens": 394097.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 69 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 742.25, + "completions/clipped_ratio": 0.0, + "completions/max_length": 948.0, + "completions/max_terminated_length": 948.0, + "completions/mean_length": 742.25, + "completions/mean_terminated_length": 742.25, + "completions/min_length": 557.0, + "completions/min_terminated_length": 557.0, + "epoch": 0.7, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.10219409316778183, + "kl": 0.00011395674209779827, + "learning_rate": 1.3263210930352737e-06, + "loss": 4.589557647705078e-06, + "num_tokens": 400162.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 70 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 575.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 610.0, + "completions/max_terminated_length": 610.0, + "completions/mean_length": 575.75, + "completions/mean_terminated_length": 575.75, + "completions/min_length": 538.0, + "completions/min_terminated_length": 538.0, + "epoch": 0.71, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.14183653891086578, + "kl": 0.00029851996441720985, + "learning_rate": 1.2500000000000007e-06, + "loss": 1.1943280696868896e-05, + "num_tokens": 405581.0, + "reward": 0.1550000160932541, + "reward_std": 0.7699999809265137, + "rewards/reward_environment_execution/mean": -0.0949999988079071, + "rewards/reward_environment_execution/std": 0.26999998092651367, + "rewards/reward_format_compliance/mean": 0.02499999850988388, + "rewards/reward_format_compliance/std": 0.3499999940395355, + "rewards/reward_investigation_quality/mean": 0.22500000894069672, + "rewards/reward_investigation_quality/std": 0.15000002086162567, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 71 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 647.75, + "completions/clipped_ratio": 0.0, + "completions/max_length": 970.0, + "completions/max_terminated_length": 970.0, + "completions/mean_length": 647.75, + "completions/mean_terminated_length": 647.75, + "completions/min_length": 429.0, + "completions/min_terminated_length": 429.0, + "epoch": 0.72, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.15262170135974884, + "kl": 0.0001618979367776774, + "learning_rate": 1.1752018394169882e-06, + "loss": 6.4820051193237305e-06, + "num_tokens": 411464.0, + "reward": -0.22999998927116394, + "reward_std": 0.8891193866729736, + "rewards/reward_environment_execution/mean": -0.23000000417232513, + "rewards/reward_environment_execution/std": 0.31176915764808655, + "rewards/reward_format_compliance/mean": -0.15000000596046448, + "rewards/reward_format_compliance/std": 0.404145210981369, + "rewards/reward_investigation_quality/mean": 0.15000000596046448, + "rewards/reward_investigation_quality/std": 0.17320507764816284, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 72 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 533.5, + "completions/clipped_ratio": 0.0, + "completions/max_length": 845.0, + "completions/max_terminated_length": 845.0, + "completions/mean_length": 533.5, + "completions/mean_terminated_length": 533.5, + "completions/min_length": 328.0, + "completions/min_terminated_length": 328.0, + "epoch": 0.73, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.00044858516776002944, + "kl": 0.00018171430565416813, + "learning_rate": 1.1020177413231334e-06, + "loss": 7.26857251720503e-06, + "num_tokens": 416698.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 73 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 492.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 612.0, + "completions/max_terminated_length": 612.0, + "completions/mean_length": 492.0, + "completions/mean_terminated_length": 492.0, + "completions/min_length": 387.0, + "completions/min_terminated_length": 387.0, + "epoch": 0.74, + "frac_reward_zero_std": 1.0, + "grad_norm": 0.0007429426186718047, + "kl": 0.00022592291861656122, + "learning_rate": 1.0305368692688175e-06, + "loss": 9.036917617777362e-06, + "num_tokens": 421782.0, + "reward": 0.5400000214576721, + "reward_std": 0.0, + "rewards/reward_environment_execution/mean": 0.03999999910593033, + "rewards/reward_environment_execution/std": 0.0, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.30000001192092896, + "rewards/reward_investigation_quality/std": 0.0, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 74 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 735.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 813.0, + "completions/max_terminated_length": 813.0, + "completions/mean_length": 735.0, + "completions/mean_terminated_length": 735.0, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.75, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.12998387217521667, + "kl": 0.0001630033293622546, + "learning_rate": 9.608463116858544e-07, + "loss": 6.616115570068359e-06, + "num_tokens": 427826.0, + "reward": 0.6147500276565552, + "reward_std": 0.14949996769428253, + "rewards/reward_environment_execution/mean": 0.16474999487400055, + "rewards/reward_environment_execution/std": 0.24950000643730164, + "rewards/reward_format_compliance/mean": 0.20000000298023224, + "rewards/reward_format_compliance/std": 0.0, + "rewards/reward_investigation_quality/mean": 0.25, + "rewards/reward_investigation_quality/std": 0.10000000894069672, + "rewards/reward_os_mechanics/mean": 0.0, + "rewards/reward_os_mechanics/std": 0.0, + "step": 75 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_input_tokens_seen": 427826, + "num_train_epochs": 1, + "save_steps": 25, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-75/training_args.bin b/checkpoint-75/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a152a78f906cd195d8d8231bc4b418c49eedd15 --- /dev/null +++ b/checkpoint-75/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9107f9f9e124fcdfbf6ef3ddbf42619d99c323466de7a60f15005895e3f3aa4b +size 6673 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5b9e375b2b4e8c4210d9d8a8a8d0642d1f715076 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05 +size 17210099 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e96441cbbe4f2fa6e57ae737bc2816d2e37a30ed --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2068 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "from_slow": true, + "is_local": false, + "legacy": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "left", + "tokenizer_class": "TokenizersBackend", + "unk_token": null, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + } +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a152a78f906cd195d8d8231bc4b418c49eedd15 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9107f9f9e124fcdfbf6ef3ddbf42619d99c323466de7a60f15005895e3f3aa4b +size 6673 diff --git a/training_curves.png b/training_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..af33a17b3be5693c991ea2580ea4e451ffa42cfd --- /dev/null +++ b/training_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2432879842a13ed8fac779819fed0ee240405c7245aad83a231ce6a9f48c8e74 +size 126073